]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-stmts.cc
check undefine_p for one more vr
[thirdparty/gcc.git] / gcc / tree-vect-stmts.cc
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
60
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
63
64 /* Return the vectorized type for the given statement. */
65
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
68 {
69 return STMT_VINFO_VECTYPE (stmt_info);
70 }
71
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
76 {
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
81
82 if (!loop_vinfo)
83 return false;
84
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
86
87 return (bb->loop_father == loop->inner);
88 }
89
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
93
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
100 {
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
107
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
111
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 }
115
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
121 {
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
124 }
125
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
131 {
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
134 }
135
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
140 {
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
145 }
146
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
148
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
151 {
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
154 }
155
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
160
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
165 {
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
168
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
175
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
180
181 return vect_name;
182 }
183
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
187
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
192 {
193 tree array_ref;
194 gimple *new_stmt;
195
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
199
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202 }
203
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
207
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
210 {
211 tree mem_ref;
212
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
217 }
218
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
221
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
225 {
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229 }
230
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
232
233 /* Function vect_mark_relevant.
234
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
240 {
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
243
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
248
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
254 {
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
259
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
264
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
270
271 if (live_p && relevant == vect_unused_in_scope)
272 {
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
278 }
279
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
284 }
285
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
289
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
292 {
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
297 }
298
299 worklist->safe_push (stmt_info);
300 }
301
302
303 /* Function is_simple_and_all_uses_invariant
304
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
306
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
310 {
311 tree op;
312 ssa_op_iter iter;
313
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
317
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
319 {
320 enum vect_def_type dt = vect_uninitialized_def;
321
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
323 {
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
328 }
329
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
332 }
333 return true;
334 }
335
336 /* Function vect_stmt_relevant_p.
337
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
340
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
345
346 CHECKME: what other side effects would the vectorizer allow? */
347
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
351 {
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
357
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
360
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
365
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
370 {
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
375 }
376
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
379 {
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
381 {
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
384 {
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
387
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
391
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
396
397 *live_p = true;
398 }
399 }
400 }
401
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
404 {
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
409 }
410
411 return (*live_p || *relevant);
412 }
413
414
415 /* Function exist_non_indexing_operands_for_use_p
416
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
419
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
422 {
423 tree operand;
424
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
430
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
436
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
440
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
443
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
446 {
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
449 {
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
462 }
463 return false;
464 }
465
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
471
472 if (operand == use)
473 return true;
474
475 return false;
476 }
477
478
479 /*
480 Function process_use.
481
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
489
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
503
504 Return true if everything is as expected. Return false otherwise. */
505
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
510 {
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
513
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
518
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
523
524 if (!dstmt_vinfo)
525 return opt_result::success ();
526
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
529
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
538 {
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
544 }
545
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
554 {
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
558
559 switch (relevant)
560 {
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
565
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
570
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
575
576 case vect_used_in_scope:
577 break;
578
579 default:
580 gcc_unreachable ();
581 }
582 }
583
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
586 ...
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
592 {
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
596
597 switch (relevant)
598 {
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
604
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
609
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
613
614 default:
615 gcc_unreachable ();
616 }
617 }
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
628 {
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
633 }
634
635
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
638 }
639
640
641 /* Function vect_mark_stmts_to_be_vectorized.
642
643 Not all stmts in the loop need to be vectorized. For example:
644
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
649
650 3. j = j + 1
651
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
654
655 This pass detects such stmts. */
656
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
659 {
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
668
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
670
671 auto_vec<stmt_vec_info, 64> worklist;
672
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
675 {
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
678 {
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
683
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
686 }
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
688 {
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
695
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
698 }
699 }
700
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
703 {
704 use_operand_p use_p;
705 ssa_op_iter iter;
706
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
711
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
719
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
727
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
729 {
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
739
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
747
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
755
756 default:
757 break;
758 }
759
760 if (is_pattern_stmt_p (stmt_vinfo))
761 {
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
766 {
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
769
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
772 {
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
783 }
784 for (; i < gimple_num_ops (assign); i++)
785 {
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
788 {
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
794 }
795 }
796 }
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
798 {
799 for (i = 0; i < gimple_call_num_args (call); i++)
800 {
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
807 }
808 }
809 }
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
812 {
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
819 }
820
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
822 {
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
830 {
831 if (fatal)
832 *fatal = false;
833 return res;
834 }
835 }
836 } /* while worklist */
837
838 return opt_result::success ();
839 }
840
841 /* Function vect_model_simple_cost.
842
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
846
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
855 {
856 int inside_cost = 0, prologue_cost = 0;
857
858 gcc_assert (cost_vec != NULL);
859
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
863
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
872
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
876
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
881 }
882
883
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
891
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
898 {
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
901
902 for (i = 0; i < pwr + 1; i++)
903 {
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
909 }
910
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
916
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
921 }
922
923 /* Returns true if the current function returns DECL. */
924
925 static bool
926 cfun_returns (tree decl)
927 {
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
931 {
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
941 do
942 {
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
944 }
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
950 }
951 return false;
952 }
953
954 /* Function vect_model_store_cost
955
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
958
959 static void
960 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
961 vect_memory_access_type memory_access_type,
962 gather_scatter_info *gs_info,
963 dr_alignment_support alignment_support_scheme,
964 int misalignment,
965 vec_load_store_type vls_type, slp_tree slp_node,
966 stmt_vector_for_cost *cost_vec)
967 {
968 unsigned int inside_cost = 0, prologue_cost = 0;
969 stmt_vec_info first_stmt_info = stmt_info;
970 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
971
972 /* ??? Somehow we need to fix this at the callers. */
973 if (slp_node)
974 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
975
976 if (vls_type == VLS_STORE_INVARIANT)
977 {
978 if (!slp_node)
979 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
980 stmt_info, 0, vect_prologue);
981 }
982
983 /* Grouped stores update all elements in the group at once,
984 so we want the DR for the first statement. */
985 if (!slp_node && grouped_access_p)
986 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
987
988 /* True if we should include any once-per-group costs as well as
989 the cost of the statement itself. For SLP we only get called
990 once per group anyhow. */
991 bool first_stmt_p = (first_stmt_info == stmt_info);
992
993 /* We assume that the cost of a single store-lanes instruction is
994 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
995 access is instead being provided by a permute-and-store operation,
996 include the cost of the permutes. */
997 if (first_stmt_p
998 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
999 {
1000 /* Uses a high and low interleave or shuffle operations for each
1001 needed permute. */
1002 int group_size = DR_GROUP_SIZE (first_stmt_info);
1003 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1004 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1005 stmt_info, 0, vect_body);
1006
1007 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_NOTE, vect_location,
1009 "vect_model_store_cost: strided group_size = %d .\n",
1010 group_size);
1011 }
1012
1013 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1014 /* Costs of the stores. */
1015 if (memory_access_type == VMAT_ELEMENTWISE
1016 || memory_access_type == VMAT_GATHER_SCATTER)
1017 {
1018 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1019 if (memory_access_type == VMAT_GATHER_SCATTER
1020 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1021 /* For emulated scatter N offset vector element extracts
1022 (we assume the scalar scaling and ptr + offset add is consumed by
1023 the load). */
1024 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1025 vec_to_scalar, stmt_info, 0,
1026 vect_body);
1027 /* N scalar stores plus extracting the elements. */
1028 inside_cost += record_stmt_cost (cost_vec,
1029 ncopies * assumed_nunits,
1030 scalar_store, stmt_info, 0, vect_body);
1031 }
1032 else
1033 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1034 misalignment, &inside_cost, cost_vec);
1035
1036 if (memory_access_type == VMAT_ELEMENTWISE
1037 || memory_access_type == VMAT_STRIDED_SLP
1038 || (memory_access_type == VMAT_GATHER_SCATTER
1039 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1040 {
1041 /* N scalar stores plus extracting the elements. */
1042 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1043 inside_cost += record_stmt_cost (cost_vec,
1044 ncopies * assumed_nunits,
1045 vec_to_scalar, stmt_info, 0, vect_body);
1046 }
1047
1048 /* When vectorizing a store into the function result assign
1049 a penalty if the function returns in a multi-register location.
1050 In this case we assume we'll end up with having to spill the
1051 vector result and do piecewise loads as a conservative estimate. */
1052 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1053 if (base
1054 && (TREE_CODE (base) == RESULT_DECL
1055 || (DECL_P (base) && cfun_returns (base)))
1056 && !aggregate_value_p (base, cfun->decl))
1057 {
1058 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1059 /* ??? Handle PARALLEL in some way. */
1060 if (REG_P (reg))
1061 {
1062 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1063 /* Assume that a single reg-reg move is possible and cheap,
1064 do not account for vector to gp register move cost. */
1065 if (nregs > 1)
1066 {
1067 /* Spill. */
1068 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1069 vector_store,
1070 stmt_info, 0, vect_epilogue);
1071 /* Loads. */
1072 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1073 scalar_load,
1074 stmt_info, 0, vect_epilogue);
1075 }
1076 }
1077 }
1078
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_store_cost: inside_cost = %d, "
1082 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1083 }
1084
1085
1086 /* Calculate cost of DR's memory access. */
1087 void
1088 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1089 dr_alignment_support alignment_support_scheme,
1090 int misalignment,
1091 unsigned int *inside_cost,
1092 stmt_vector_for_cost *body_cost_vec)
1093 {
1094 switch (alignment_support_scheme)
1095 {
1096 case dr_aligned:
1097 {
1098 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1099 vector_store, stmt_info, 0,
1100 vect_body);
1101
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_store_cost: aligned.\n");
1105 break;
1106 }
1107
1108 case dr_unaligned_supported:
1109 {
1110 /* Here, we assign an additional cost for the unaligned store. */
1111 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1112 unaligned_store, stmt_info,
1113 misalignment, vect_body);
1114 if (dump_enabled_p ())
1115 dump_printf_loc (MSG_NOTE, vect_location,
1116 "vect_model_store_cost: unaligned supported by "
1117 "hardware.\n");
1118 break;
1119 }
1120
1121 case dr_unaligned_unsupported:
1122 {
1123 *inside_cost = VECT_MAX_COST;
1124
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1127 "vect_model_store_cost: unsupported access.\n");
1128 break;
1129 }
1130
1131 default:
1132 gcc_unreachable ();
1133 }
1134 }
1135
1136 /* Calculate cost of DR's memory access. */
1137 void
1138 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1139 dr_alignment_support alignment_support_scheme,
1140 int misalignment,
1141 bool add_realign_cost, unsigned int *inside_cost,
1142 unsigned int *prologue_cost,
1143 stmt_vector_for_cost *prologue_cost_vec,
1144 stmt_vector_for_cost *body_cost_vec,
1145 bool record_prologue_costs)
1146 {
1147 switch (alignment_support_scheme)
1148 {
1149 case dr_aligned:
1150 {
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1152 stmt_info, 0, vect_body);
1153
1154 if (dump_enabled_p ())
1155 dump_printf_loc (MSG_NOTE, vect_location,
1156 "vect_model_load_cost: aligned.\n");
1157
1158 break;
1159 }
1160 case dr_unaligned_supported:
1161 {
1162 /* Here, we assign an additional cost for the unaligned load. */
1163 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1164 unaligned_load, stmt_info,
1165 misalignment, vect_body);
1166
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: unaligned supported by "
1170 "hardware.\n");
1171
1172 break;
1173 }
1174 case dr_explicit_realign:
1175 {
1176 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1177 vector_load, stmt_info, 0, vect_body);
1178 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1179 vec_perm, stmt_info, 0, vect_body);
1180
1181 /* FIXME: If the misalignment remains fixed across the iterations of
1182 the containing loop, the following cost should be added to the
1183 prologue costs. */
1184 if (targetm.vectorize.builtin_mask_for_load)
1185 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1186 stmt_info, 0, vect_body);
1187
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE, vect_location,
1190 "vect_model_load_cost: explicit realign\n");
1191
1192 break;
1193 }
1194 case dr_explicit_realign_optimized:
1195 {
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "vect_model_load_cost: unaligned software "
1199 "pipelined.\n");
1200
1201 /* Unaligned software pipeline has a load of an address, an initial
1202 load, and possibly a mask operation to "prime" the loop. However,
1203 if this is an access in a group of loads, which provide grouped
1204 access, then the above cost should only be considered for one
1205 access in the group. Inside the loop, there is a load op
1206 and a realignment op. */
1207
1208 if (add_realign_cost && record_prologue_costs)
1209 {
1210 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1211 vector_stmt, stmt_info,
1212 0, vect_prologue);
1213 if (targetm.vectorize.builtin_mask_for_load)
1214 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1215 vector_stmt, stmt_info,
1216 0, vect_prologue);
1217 }
1218
1219 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1220 stmt_info, 0, vect_body);
1221 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1222 stmt_info, 0, vect_body);
1223
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_model_load_cost: explicit realign optimized"
1227 "\n");
1228
1229 break;
1230 }
1231
1232 case dr_unaligned_unsupported:
1233 {
1234 *inside_cost = VECT_MAX_COST;
1235
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1238 "vect_model_load_cost: unsupported access.\n");
1239 break;
1240 }
1241
1242 default:
1243 gcc_unreachable ();
1244 }
1245 }
1246
1247 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1248 the loop preheader for the vectorized stmt STMT_VINFO. */
1249
1250 static void
1251 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1252 gimple_stmt_iterator *gsi)
1253 {
1254 if (gsi)
1255 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1256 else
1257 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1258
1259 if (dump_enabled_p ())
1260 dump_printf_loc (MSG_NOTE, vect_location,
1261 "created new init_stmt: %G", new_stmt);
1262 }
1263
1264 /* Function vect_init_vector.
1265
1266 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1267 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1268 vector type a vector with all elements equal to VAL is created first.
1269 Place the initialization at GSI if it is not NULL. Otherwise, place the
1270 initialization at the loop preheader.
1271 Return the DEF of INIT_STMT.
1272 It will be used in the vectorization of STMT_INFO. */
1273
1274 tree
1275 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1276 gimple_stmt_iterator *gsi)
1277 {
1278 gimple *init_stmt;
1279 tree new_temp;
1280
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1283 {
1284 gcc_assert (VECTOR_TYPE_P (type));
1285 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1286 {
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type))
1290 {
1291 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1292 tree false_val = build_zero_cst (TREE_TYPE (type));
1293
1294 if (CONSTANT_CLASS_P (val))
1295 val = integer_zerop (val) ? false_val : true_val;
1296 else
1297 {
1298 new_temp = make_ssa_name (TREE_TYPE (type));
1299 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1300 val, true_val, false_val);
1301 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1302 val = new_temp;
1303 }
1304 }
1305 else
1306 {
1307 gimple_seq stmts = NULL;
1308 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1309 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1310 TREE_TYPE (type), val);
1311 else
1312 /* ??? Condition vectorization expects us to do
1313 promotion of invariant/external defs. */
1314 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1315 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1316 !gsi_end_p (gsi2); )
1317 {
1318 init_stmt = gsi_stmt (gsi2);
1319 gsi_remove (&gsi2, false);
1320 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1321 }
1322 }
1323 }
1324 val = build_vector_from_val (type, val);
1325 }
1326
1327 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1328 init_stmt = gimple_build_assign (new_temp, val);
1329 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1330 return new_temp;
1331 }
1332
1333
1334 /* Function vect_get_vec_defs_for_operand.
1335
1336 OP is an operand in STMT_VINFO. This function returns a vector of
1337 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1338
1339 In the case that OP is an SSA_NAME which is defined in the loop, then
1340 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1341
1342 In case OP is an invariant or constant, a new stmt that creates a vector def
1343 needs to be introduced. VECTYPE may be used to specify a required type for
1344 vector invariant. */
1345
1346 void
1347 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1348 unsigned ncopies,
1349 tree op, vec<tree> *vec_oprnds, tree vectype)
1350 {
1351 gimple *def_stmt;
1352 enum vect_def_type dt;
1353 bool is_simple_use;
1354 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1355
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_NOTE, vect_location,
1358 "vect_get_vec_defs_for_operand: %T\n", op);
1359
1360 stmt_vec_info def_stmt_info;
1361 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1362 &def_stmt_info, &def_stmt);
1363 gcc_assert (is_simple_use);
1364 if (def_stmt && dump_enabled_p ())
1365 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1366
1367 vec_oprnds->create (ncopies);
1368 if (dt == vect_constant_def || dt == vect_external_def)
1369 {
1370 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1371 tree vector_type;
1372
1373 if (vectype)
1374 vector_type = vectype;
1375 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1376 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1377 vector_type = truth_type_for (stmt_vectype);
1378 else
1379 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1380
1381 gcc_assert (vector_type);
1382 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1383 while (ncopies--)
1384 vec_oprnds->quick_push (vop);
1385 }
1386 else
1387 {
1388 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1389 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1390 for (unsigned i = 0; i < ncopies; ++i)
1391 vec_oprnds->quick_push (gimple_get_lhs
1392 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1393 }
1394 }
1395
1396
1397 /* Get vectorized definitions for OP0 and OP1. */
1398
1399 void
1400 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1401 unsigned ncopies,
1402 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1403 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1404 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1405 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1406 {
1407 if (slp_node)
1408 {
1409 if (op0)
1410 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1411 if (op1)
1412 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1413 if (op2)
1414 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1415 if (op3)
1416 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1417 }
1418 else
1419 {
1420 if (op0)
1421 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1422 op0, vec_oprnds0, vectype0);
1423 if (op1)
1424 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1425 op1, vec_oprnds1, vectype1);
1426 if (op2)
1427 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1428 op2, vec_oprnds2, vectype2);
1429 if (op3)
1430 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1431 op3, vec_oprnds3, vectype3);
1432 }
1433 }
1434
1435 void
1436 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1437 unsigned ncopies,
1438 tree op0, vec<tree> *vec_oprnds0,
1439 tree op1, vec<tree> *vec_oprnds1,
1440 tree op2, vec<tree> *vec_oprnds2,
1441 tree op3, vec<tree> *vec_oprnds3)
1442 {
1443 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1444 op0, vec_oprnds0, NULL_TREE,
1445 op1, vec_oprnds1, NULL_TREE,
1446 op2, vec_oprnds2, NULL_TREE,
1447 op3, vec_oprnds3, NULL_TREE);
1448 }
1449
1450 /* Helper function called by vect_finish_replace_stmt and
1451 vect_finish_stmt_generation. Set the location of the new
1452 statement and create and return a stmt_vec_info for it. */
1453
1454 static void
1455 vect_finish_stmt_generation_1 (vec_info *,
1456 stmt_vec_info stmt_info, gimple *vec_stmt)
1457 {
1458 if (dump_enabled_p ())
1459 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1460
1461 if (stmt_info)
1462 {
1463 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1464
1465 /* While EH edges will generally prevent vectorization, stmt might
1466 e.g. be in a must-not-throw region. Ensure newly created stmts
1467 that could throw are part of the same region. */
1468 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1469 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1470 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1471 }
1472 else
1473 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1474 }
1475
1476 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1477 which sets the same scalar result as STMT_INFO did. Create and return a
1478 stmt_vec_info for VEC_STMT. */
1479
1480 void
1481 vect_finish_replace_stmt (vec_info *vinfo,
1482 stmt_vec_info stmt_info, gimple *vec_stmt)
1483 {
1484 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1485 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1486
1487 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1488 gsi_replace (&gsi, vec_stmt, true);
1489
1490 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1491 }
1492
1493 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1494 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1495
1496 void
1497 vect_finish_stmt_generation (vec_info *vinfo,
1498 stmt_vec_info stmt_info, gimple *vec_stmt,
1499 gimple_stmt_iterator *gsi)
1500 {
1501 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1502
1503 if (!gsi_end_p (*gsi)
1504 && gimple_has_mem_ops (vec_stmt))
1505 {
1506 gimple *at_stmt = gsi_stmt (*gsi);
1507 tree vuse = gimple_vuse (at_stmt);
1508 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1509 {
1510 tree vdef = gimple_vdef (at_stmt);
1511 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1512 gimple_set_modified (vec_stmt, true);
1513 /* If we have an SSA vuse and insert a store, update virtual
1514 SSA form to avoid triggering the renamer. Do so only
1515 if we can easily see all uses - which is what almost always
1516 happens with the way vectorized stmts are inserted. */
1517 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1518 && ((is_gimple_assign (vec_stmt)
1519 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1520 || (is_gimple_call (vec_stmt)
1521 && (!(gimple_call_flags (vec_stmt)
1522 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1523 || (gimple_call_lhs (vec_stmt)
1524 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1525 {
1526 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1527 gimple_set_vdef (vec_stmt, new_vdef);
1528 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1529 }
1530 }
1531 }
1532 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1533 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1534 }
1535
1536 /* We want to vectorize a call to combined function CFN with function
1537 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1538 as the types of all inputs. Check whether this is possible using
1539 an internal function, returning its code if so or IFN_LAST if not. */
1540
1541 static internal_fn
1542 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1543 tree vectype_out, tree vectype_in)
1544 {
1545 internal_fn ifn;
1546 if (internal_fn_p (cfn))
1547 ifn = as_internal_fn (cfn);
1548 else
1549 ifn = associated_internal_fn (fndecl);
1550 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1551 {
1552 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1553 if (info.vectorizable)
1554 {
1555 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1556 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1557 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1558 OPTIMIZE_FOR_SPEED))
1559 return ifn;
1560 }
1561 }
1562 return IFN_LAST;
1563 }
1564
1565
1566 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1567 gimple_stmt_iterator *);
1568
1569 /* Check whether a load or store statement in the loop described by
1570 LOOP_VINFO is possible in a loop using partial vectors. This is
1571 testing whether the vectorizer pass has the appropriate support,
1572 as well as whether the target does.
1573
1574 VLS_TYPE says whether the statement is a load or store and VECTYPE
1575 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1576 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1577 says how the load or store is going to be implemented and GROUP_SIZE
1578 is the number of load or store statements in the containing group.
1579 If the access is a gather load or scatter store, GS_INFO describes
1580 its arguments. If the load or store is conditional, SCALAR_MASK is the
1581 condition under which it occurs.
1582
1583 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1584 vectors is not supported, otherwise record the required rgroup control
1585 types. */
1586
1587 static void
1588 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1589 slp_tree slp_node,
1590 vec_load_store_type vls_type,
1591 int group_size,
1592 vect_memory_access_type
1593 memory_access_type,
1594 gather_scatter_info *gs_info,
1595 tree scalar_mask)
1596 {
1597 /* Invariant loads need no special support. */
1598 if (memory_access_type == VMAT_INVARIANT)
1599 return;
1600
1601 unsigned int nvectors;
1602 if (slp_node)
1603 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1604 else
1605 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1606
1607 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1608 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1609 machine_mode vecmode = TYPE_MODE (vectype);
1610 bool is_load = (vls_type == VLS_LOAD);
1611 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1612 {
1613 internal_fn ifn
1614 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1615 : vect_store_lanes_supported (vectype, group_size, true));
1616 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1617 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1618 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1619 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1620 scalar_mask);
1621 else
1622 {
1623 if (dump_enabled_p ())
1624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1625 "can't operate on partial vectors because"
1626 " the target doesn't have an appropriate"
1627 " load/store-lanes instruction.\n");
1628 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1629 }
1630 return;
1631 }
1632
1633 if (memory_access_type == VMAT_GATHER_SCATTER)
1634 {
1635 internal_fn ifn = (is_load
1636 ? IFN_MASK_GATHER_LOAD
1637 : IFN_MASK_SCATTER_STORE);
1638 internal_fn len_ifn = (is_load
1639 ? IFN_MASK_LEN_GATHER_LOAD
1640 : IFN_MASK_LEN_SCATTER_STORE);
1641 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1642 gs_info->memory_type,
1643 gs_info->offset_vectype,
1644 gs_info->scale))
1645 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1646 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1647 gs_info->memory_type,
1648 gs_info->offset_vectype,
1649 gs_info->scale))
1650 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1651 scalar_mask);
1652 else
1653 {
1654 if (dump_enabled_p ())
1655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1656 "can't operate on partial vectors because"
1657 " the target doesn't have an appropriate"
1658 " gather load or scatter store instruction.\n");
1659 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1660 }
1661 return;
1662 }
1663
1664 if (memory_access_type != VMAT_CONTIGUOUS
1665 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1666 {
1667 /* Element X of the data must come from iteration i * VF + X of the
1668 scalar loop. We need more work to support other mappings. */
1669 if (dump_enabled_p ())
1670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1671 "can't operate on partial vectors because an"
1672 " access isn't contiguous.\n");
1673 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1674 return;
1675 }
1676
1677 if (!VECTOR_MODE_P (vecmode))
1678 {
1679 if (dump_enabled_p ())
1680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1681 "can't operate on partial vectors when emulating"
1682 " vector operations.\n");
1683 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1684 return;
1685 }
1686
1687 /* We might load more scalars than we need for permuting SLP loads.
1688 We checked in get_group_load_store_type that the extra elements
1689 don't leak into a new vector. */
1690 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1691 {
1692 unsigned int nvectors;
1693 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1694 return nvectors;
1695 gcc_unreachable ();
1696 };
1697
1698 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1699 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1700 machine_mode mask_mode;
1701 machine_mode vmode;
1702 bool using_partial_vectors_p = false;
1703 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1704 {
1705 nvectors = group_memory_nvectors (group_size * vf, nunits);
1706 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1707 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1708 using_partial_vectors_p = true;
1709 }
1710 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1711 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1712 {
1713 nvectors = group_memory_nvectors (group_size * vf, nunits);
1714 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1715 using_partial_vectors_p = true;
1716 }
1717
1718 if (!using_partial_vectors_p)
1719 {
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1722 "can't operate on partial vectors because the"
1723 " target doesn't have the appropriate partial"
1724 " vectorization load or store.\n");
1725 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1726 }
1727 }
1728
1729 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1730 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1731 that needs to be applied to all loads and stores in a vectorized loop.
1732 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1733 otherwise return VEC_MASK & LOOP_MASK.
1734
1735 MASK_TYPE is the type of both masks. If new statements are needed,
1736 insert them before GSI. */
1737
1738 static tree
1739 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1740 tree vec_mask, gimple_stmt_iterator *gsi)
1741 {
1742 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1743 if (!loop_mask)
1744 return vec_mask;
1745
1746 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1747
1748 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1749 return vec_mask;
1750
1751 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1752 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1753 vec_mask, loop_mask);
1754
1755 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1756 return and_res;
1757 }
1758
1759 /* Determine whether we can use a gather load or scatter store to vectorize
1760 strided load or store STMT_INFO by truncating the current offset to a
1761 smaller width. We need to be able to construct an offset vector:
1762
1763 { 0, X, X*2, X*3, ... }
1764
1765 without loss of precision, where X is STMT_INFO's DR_STEP.
1766
1767 Return true if this is possible, describing the gather load or scatter
1768 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1769
1770 static bool
1771 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1772 loop_vec_info loop_vinfo, bool masked_p,
1773 gather_scatter_info *gs_info)
1774 {
1775 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1776 data_reference *dr = dr_info->dr;
1777 tree step = DR_STEP (dr);
1778 if (TREE_CODE (step) != INTEGER_CST)
1779 {
1780 /* ??? Perhaps we could use range information here? */
1781 if (dump_enabled_p ())
1782 dump_printf_loc (MSG_NOTE, vect_location,
1783 "cannot truncate variable step.\n");
1784 return false;
1785 }
1786
1787 /* Get the number of bits in an element. */
1788 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1789 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1790 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1791
1792 /* Set COUNT to the upper limit on the number of elements - 1.
1793 Start with the maximum vectorization factor. */
1794 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1795
1796 /* Try lowering COUNT to the number of scalar latch iterations. */
1797 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1798 widest_int max_iters;
1799 if (max_loop_iterations (loop, &max_iters)
1800 && max_iters < count)
1801 count = max_iters.to_shwi ();
1802
1803 /* Try scales of 1 and the element size. */
1804 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1805 wi::overflow_type overflow = wi::OVF_NONE;
1806 for (int i = 0; i < 2; ++i)
1807 {
1808 int scale = scales[i];
1809 widest_int factor;
1810 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1811 continue;
1812
1813 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1814 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1815 if (overflow)
1816 continue;
1817 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1818 unsigned int min_offset_bits = wi::min_precision (range, sign);
1819
1820 /* Find the narrowest viable offset type. */
1821 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1822 tree offset_type = build_nonstandard_integer_type (offset_bits,
1823 sign == UNSIGNED);
1824
1825 /* See whether the target supports the operation with an offset
1826 no narrower than OFFSET_TYPE. */
1827 tree memory_type = TREE_TYPE (DR_REF (dr));
1828 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1829 vectype, memory_type, offset_type, scale,
1830 &gs_info->ifn, &gs_info->offset_vectype)
1831 || gs_info->ifn == IFN_LAST)
1832 continue;
1833
1834 gs_info->decl = NULL_TREE;
1835 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1836 but we don't need to store that here. */
1837 gs_info->base = NULL_TREE;
1838 gs_info->element_type = TREE_TYPE (vectype);
1839 gs_info->offset = fold_convert (offset_type, step);
1840 gs_info->offset_dt = vect_constant_def;
1841 gs_info->scale = scale;
1842 gs_info->memory_type = memory_type;
1843 return true;
1844 }
1845
1846 if (overflow && dump_enabled_p ())
1847 dump_printf_loc (MSG_NOTE, vect_location,
1848 "truncating gather/scatter offset to %d bits"
1849 " might change its value.\n", element_bits);
1850
1851 return false;
1852 }
1853
1854 /* Return true if we can use gather/scatter internal functions to
1855 vectorize STMT_INFO, which is a grouped or strided load or store.
1856 MASKED_P is true if load or store is conditional. When returning
1857 true, fill in GS_INFO with the information required to perform the
1858 operation. */
1859
1860 static bool
1861 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1862 loop_vec_info loop_vinfo, bool masked_p,
1863 gather_scatter_info *gs_info)
1864 {
1865 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1866 || gs_info->ifn == IFN_LAST)
1867 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1868 masked_p, gs_info);
1869
1870 tree old_offset_type = TREE_TYPE (gs_info->offset);
1871 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1872
1873 gcc_assert (TYPE_PRECISION (new_offset_type)
1874 >= TYPE_PRECISION (old_offset_type));
1875 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1876
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_NOTE, vect_location,
1879 "using gather/scatter for strided/grouped access,"
1880 " scale = %d\n", gs_info->scale);
1881
1882 return true;
1883 }
1884
1885 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1886 elements with a known constant step. Return -1 if that step
1887 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1888
1889 static int
1890 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1891 {
1892 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1893 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1894 size_zero_node);
1895 }
1896
1897 /* If the target supports a permute mask that reverses the elements in
1898 a vector of type VECTYPE, return that mask, otherwise return null. */
1899
1900 static tree
1901 perm_mask_for_reverse (tree vectype)
1902 {
1903 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1904
1905 /* The encoding has a single stepped pattern. */
1906 vec_perm_builder sel (nunits, 1, 3);
1907 for (int i = 0; i < 3; ++i)
1908 sel.quick_push (nunits - 1 - i);
1909
1910 vec_perm_indices indices (sel, 1, nunits);
1911 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1912 indices))
1913 return NULL_TREE;
1914 return vect_gen_perm_mask_checked (vectype, indices);
1915 }
1916
1917 /* A subroutine of get_load_store_type, with a subset of the same
1918 arguments. Handle the case where STMT_INFO is a load or store that
1919 accesses consecutive elements with a negative step. Sets *POFFSET
1920 to the offset to be applied to the DR for the first access. */
1921
1922 static vect_memory_access_type
1923 get_negative_load_store_type (vec_info *vinfo,
1924 stmt_vec_info stmt_info, tree vectype,
1925 vec_load_store_type vls_type,
1926 unsigned int ncopies, poly_int64 *poffset)
1927 {
1928 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1929 dr_alignment_support alignment_support_scheme;
1930
1931 if (ncopies > 1)
1932 {
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "multiple types with negative step.\n");
1936 return VMAT_ELEMENTWISE;
1937 }
1938
1939 /* For backward running DRs the first access in vectype actually is
1940 N-1 elements before the address of the DR. */
1941 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1942 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1943
1944 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1945 alignment_support_scheme
1946 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1947 if (alignment_support_scheme != dr_aligned
1948 && alignment_support_scheme != dr_unaligned_supported)
1949 {
1950 if (dump_enabled_p ())
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1952 "negative step but alignment required.\n");
1953 *poffset = 0;
1954 return VMAT_ELEMENTWISE;
1955 }
1956
1957 if (vls_type == VLS_STORE_INVARIANT)
1958 {
1959 if (dump_enabled_p ())
1960 dump_printf_loc (MSG_NOTE, vect_location,
1961 "negative step with invariant source;"
1962 " no permute needed.\n");
1963 return VMAT_CONTIGUOUS_DOWN;
1964 }
1965
1966 if (!perm_mask_for_reverse (vectype))
1967 {
1968 if (dump_enabled_p ())
1969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1970 "negative step and reversing not supported.\n");
1971 *poffset = 0;
1972 return VMAT_ELEMENTWISE;
1973 }
1974
1975 return VMAT_CONTIGUOUS_REVERSE;
1976 }
1977
1978 /* STMT_INFO is either a masked or unconditional store. Return the value
1979 being stored. */
1980
1981 tree
1982 vect_get_store_rhs (stmt_vec_info stmt_info)
1983 {
1984 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1985 {
1986 gcc_assert (gimple_assign_single_p (assign));
1987 return gimple_assign_rhs1 (assign);
1988 }
1989 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1990 {
1991 internal_fn ifn = gimple_call_internal_fn (call);
1992 int index = internal_fn_stored_value_index (ifn);
1993 gcc_assert (index >= 0);
1994 return gimple_call_arg (call, index);
1995 }
1996 gcc_unreachable ();
1997 }
1998
1999 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2000
2001 This function returns a vector type which can be composed with NETLS pieces,
2002 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2003 same vector size as the return vector. It checks target whether supports
2004 pieces-size vector mode for construction firstly, if target fails to, check
2005 pieces-size scalar mode for construction further. It returns NULL_TREE if
2006 fails to find the available composition.
2007
2008 For example, for (vtype=V16QI, nelts=4), we can probably get:
2009 - V16QI with PTYPE V4QI.
2010 - V4SI with PTYPE SI.
2011 - NULL_TREE. */
2012
2013 static tree
2014 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2015 {
2016 gcc_assert (VECTOR_TYPE_P (vtype));
2017 gcc_assert (known_gt (nelts, 0U));
2018
2019 machine_mode vmode = TYPE_MODE (vtype);
2020 if (!VECTOR_MODE_P (vmode))
2021 return NULL_TREE;
2022
2023 /* When we are asked to compose the vector from its components let
2024 that happen directly. */
2025 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
2026 {
2027 *ptype = TREE_TYPE (vtype);
2028 return vtype;
2029 }
2030
2031 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2032 unsigned int pbsize;
2033 if (constant_multiple_p (vbsize, nelts, &pbsize))
2034 {
2035 /* First check if vec_init optab supports construction from
2036 vector pieces directly. */
2037 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2038 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2039 machine_mode rmode;
2040 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2041 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2042 != CODE_FOR_nothing))
2043 {
2044 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2045 return vtype;
2046 }
2047
2048 /* Otherwise check if exists an integer type of the same piece size and
2049 if vec_init optab supports construction from it directly. */
2050 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2051 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2052 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2053 != CODE_FOR_nothing))
2054 {
2055 *ptype = build_nonstandard_integer_type (pbsize, 1);
2056 return build_vector_type (*ptype, nelts);
2057 }
2058 }
2059
2060 return NULL_TREE;
2061 }
2062
2063 /* A subroutine of get_load_store_type, with a subset of the same
2064 arguments. Handle the case where STMT_INFO is part of a grouped load
2065 or store.
2066
2067 For stores, the statements in the group are all consecutive
2068 and there is no gap at the end. For loads, the statements in the
2069 group might not be consecutive; there can be gaps between statements
2070 as well as at the end. */
2071
2072 static bool
2073 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2074 tree vectype, slp_tree slp_node,
2075 bool masked_p, vec_load_store_type vls_type,
2076 vect_memory_access_type *memory_access_type,
2077 poly_int64 *poffset,
2078 dr_alignment_support *alignment_support_scheme,
2079 int *misalignment,
2080 gather_scatter_info *gs_info,
2081 internal_fn *lanes_ifn)
2082 {
2083 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2084 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2085 stmt_vec_info first_stmt_info;
2086 unsigned int group_size;
2087 unsigned HOST_WIDE_INT gap;
2088 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2089 {
2090 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2091 group_size = DR_GROUP_SIZE (first_stmt_info);
2092 gap = DR_GROUP_GAP (first_stmt_info);
2093 }
2094 else
2095 {
2096 first_stmt_info = stmt_info;
2097 group_size = 1;
2098 gap = 0;
2099 }
2100 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2101 bool single_element_p = (stmt_info == first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2104
2105 /* True if the vectorized statements would access beyond the last
2106 statement in the group. */
2107 bool overrun_p = false;
2108
2109 /* True if we can cope with such overrun by peeling for gaps, so that
2110 there is at least one final scalar iteration after the vector loop. */
2111 bool can_overrun_p = (!masked_p
2112 && vls_type == VLS_LOAD
2113 && loop_vinfo
2114 && !loop->inner);
2115
2116 /* There can only be a gap at the end of the group if the stride is
2117 known at compile time. */
2118 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2119
2120 /* Stores can't yet have gaps. */
2121 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2122
2123 if (slp_node)
2124 {
2125 /* For SLP vectorization we directly vectorize a subchain
2126 without permutation. */
2127 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2128 first_dr_info
2129 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2130 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2131 {
2132 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2133 separated by the stride, until we have a complete vector.
2134 Fall back to scalar accesses if that isn't possible. */
2135 if (multiple_p (nunits, group_size))
2136 *memory_access_type = VMAT_STRIDED_SLP;
2137 else
2138 *memory_access_type = VMAT_ELEMENTWISE;
2139 }
2140 else
2141 {
2142 overrun_p = loop_vinfo && gap != 0;
2143 if (overrun_p && vls_type != VLS_LOAD)
2144 {
2145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2146 "Grouped store with gaps requires"
2147 " non-consecutive accesses\n");
2148 return false;
2149 }
2150 /* An overrun is fine if the trailing elements are smaller
2151 than the alignment boundary B. Every vector access will
2152 be a multiple of B and so we are guaranteed to access a
2153 non-gap element in the same B-sized block. */
2154 if (overrun_p
2155 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2156 vectype)
2157 / vect_get_scalar_dr_size (first_dr_info)))
2158 overrun_p = false;
2159
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alss;
2165 int misalign = dr_misalignment (first_dr_info, vectype);
2166 tree half_vtype;
2167 if (overrun_p
2168 && !masked_p
2169 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2170 vectype, misalign)))
2171 == dr_aligned
2172 || alss == dr_unaligned_supported)
2173 && known_eq (nunits, (group_size - gap) * 2)
2174 && known_eq (nunits, group_size)
2175 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 != NULL_TREE))
2177 overrun_p = false;
2178
2179 if (overrun_p && !can_overrun_p)
2180 {
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "Peeling for outer loop is not supported\n");
2184 return false;
2185 }
2186 int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 if (cmp < 0)
2188 {
2189 if (single_element_p)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type = get_negative_load_store_type
2193 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2194 else
2195 {
2196 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 separated by the stride, until we have a complete vector.
2198 Fall back to scalar accesses if that isn't possible. */
2199 if (multiple_p (nunits, group_size))
2200 *memory_access_type = VMAT_STRIDED_SLP;
2201 else
2202 *memory_access_type = VMAT_ELEMENTWISE;
2203 }
2204 }
2205 else if (cmp == 0 && loop_vinfo)
2206 {
2207 gcc_assert (vls_type == VLS_LOAD);
2208 *memory_access_type = VMAT_INVARIANT;
2209 /* Invariant accesses perform only component accesses, alignment
2210 is irrelevant for them. */
2211 *alignment_support_scheme = dr_unaligned_supported;
2212 }
2213 else
2214 *memory_access_type = VMAT_CONTIGUOUS;
2215
2216 /* When we have a contiguous access across loop iterations
2217 but the access in the loop doesn't cover the full vector
2218 we can end up with no gap recorded but still excess
2219 elements accessed, see PR103116. Make sure we peel for
2220 gaps if necessary and sufficient and give up if not.
2221
2222 If there is a combination of the access not covering the full
2223 vector and a gap recorded then we may need to peel twice. */
2224 if (loop_vinfo
2225 && *memory_access_type == VMAT_CONTIGUOUS
2226 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2227 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2228 nunits))
2229 {
2230 unsigned HOST_WIDE_INT cnunits, cvf;
2231 if (!can_overrun_p
2232 || !nunits.is_constant (&cnunits)
2233 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2234 /* Peeling for gaps assumes that a single scalar iteration
2235 is enough to make sure the last vector iteration doesn't
2236 access excess elements.
2237 ??? Enhancements include peeling multiple iterations
2238 or using masked loads with a static mask. */
2239 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2240 {
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "peeling for gaps insufficient for "
2244 "access\n");
2245 return false;
2246 }
2247 overrun_p = true;
2248 }
2249 }
2250 }
2251 else
2252 {
2253 /* We can always handle this case using elementwise accesses,
2254 but see if something more efficient is available. */
2255 *memory_access_type = VMAT_ELEMENTWISE;
2256
2257 /* If there is a gap at the end of the group then these optimizations
2258 would access excess elements in the last iteration. */
2259 bool would_overrun_p = (gap != 0);
2260 /* An overrun is fine if the trailing elements are smaller than the
2261 alignment boundary B. Every vector access will be a multiple of B
2262 and so we are guaranteed to access a non-gap element in the
2263 same B-sized block. */
2264 if (would_overrun_p
2265 && !masked_p
2266 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2267 / vect_get_scalar_dr_size (first_dr_info)))
2268 would_overrun_p = false;
2269
2270 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2271 && (can_overrun_p || !would_overrun_p)
2272 && compare_step_with_zero (vinfo, stmt_info) > 0)
2273 {
2274 /* First cope with the degenerate case of a single-element
2275 vector. */
2276 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2277 ;
2278
2279 else
2280 {
2281 /* Otherwise try using LOAD/STORE_LANES. */
2282 *lanes_ifn
2283 = vls_type == VLS_LOAD
2284 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2285 : vect_store_lanes_supported (vectype, group_size,
2286 masked_p);
2287 if (*lanes_ifn != IFN_LAST)
2288 {
2289 *memory_access_type = VMAT_LOAD_STORE_LANES;
2290 overrun_p = would_overrun_p;
2291 }
2292
2293 /* If that fails, try using permuting loads. */
2294 else if (vls_type == VLS_LOAD
2295 ? vect_grouped_load_supported (vectype,
2296 single_element_p,
2297 group_size)
2298 : vect_grouped_store_supported (vectype, group_size))
2299 {
2300 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2301 overrun_p = would_overrun_p;
2302 }
2303 }
2304 }
2305
2306 /* As a last resort, trying using a gather load or scatter store.
2307
2308 ??? Although the code can handle all group sizes correctly,
2309 it probably isn't a win to use separate strided accesses based
2310 on nearby locations. Or, even if it's a win over scalar code,
2311 it might not be a win over vectorizing at a lower VF, if that
2312 allows us to use contiguous accesses. */
2313 if (*memory_access_type == VMAT_ELEMENTWISE
2314 && single_element_p
2315 && loop_vinfo
2316 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2317 masked_p, gs_info))
2318 *memory_access_type = VMAT_GATHER_SCATTER;
2319 }
2320
2321 if (*memory_access_type == VMAT_GATHER_SCATTER
2322 || *memory_access_type == VMAT_ELEMENTWISE)
2323 {
2324 *alignment_support_scheme = dr_unaligned_supported;
2325 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2326 }
2327 else
2328 {
2329 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2330 *alignment_support_scheme
2331 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2332 *misalignment);
2333 }
2334
2335 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2336 {
2337 /* STMT is the leader of the group. Check the operands of all the
2338 stmts of the group. */
2339 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2340 while (next_stmt_info)
2341 {
2342 tree op = vect_get_store_rhs (next_stmt_info);
2343 enum vect_def_type dt;
2344 if (!vect_is_simple_use (op, vinfo, &dt))
2345 {
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2348 "use not simple.\n");
2349 return false;
2350 }
2351 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2352 }
2353 }
2354
2355 if (overrun_p)
2356 {
2357 gcc_assert (can_overrun_p);
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "Data access with gaps requires scalar "
2361 "epilogue loop\n");
2362 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2363 }
2364
2365 return true;
2366 }
2367
2368 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2369 if there is a memory access type that the vectorized form can use,
2370 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2371 or scatters, fill in GS_INFO accordingly. In addition
2372 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2373 the target does not support the alignment scheme. *MISALIGNMENT
2374 is set according to the alignment of the access (including
2375 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2376
2377 SLP says whether we're performing SLP rather than loop vectorization.
2378 MASKED_P is true if the statement is conditional on a vectorized mask.
2379 VECTYPE is the vector type that the vectorized statements will use.
2380 NCOPIES is the number of vector statements that will be needed. */
2381
2382 static bool
2383 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2384 tree vectype, slp_tree slp_node,
2385 bool masked_p, vec_load_store_type vls_type,
2386 unsigned int ncopies,
2387 vect_memory_access_type *memory_access_type,
2388 poly_int64 *poffset,
2389 dr_alignment_support *alignment_support_scheme,
2390 int *misalignment,
2391 gather_scatter_info *gs_info,
2392 internal_fn *lanes_ifn)
2393 {
2394 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2395 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2396 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2397 *poffset = 0;
2398 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2399 {
2400 *memory_access_type = VMAT_GATHER_SCATTER;
2401 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2402 gcc_unreachable ();
2403 /* When using internal functions, we rely on pattern recognition
2404 to convert the type of the offset to the type that the target
2405 requires, with the result being a call to an internal function.
2406 If that failed for some reason (e.g. because another pattern
2407 took priority), just handle cases in which the offset already
2408 has the right type. */
2409 else if (gs_info->ifn != IFN_LAST
2410 && !is_gimple_call (stmt_info->stmt)
2411 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2412 TREE_TYPE (gs_info->offset_vectype)))
2413 {
2414 if (dump_enabled_p ())
2415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2416 "%s offset requires a conversion\n",
2417 vls_type == VLS_LOAD ? "gather" : "scatter");
2418 return false;
2419 }
2420 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2421 &gs_info->offset_dt,
2422 &gs_info->offset_vectype))
2423 {
2424 if (dump_enabled_p ())
2425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2426 "%s index use not simple.\n",
2427 vls_type == VLS_LOAD ? "gather" : "scatter");
2428 return false;
2429 }
2430 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2431 {
2432 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2433 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2434 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2435 (gs_info->offset_vectype),
2436 TYPE_VECTOR_SUBPARTS (vectype)))
2437 {
2438 if (dump_enabled_p ())
2439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2440 "unsupported vector types for emulated "
2441 "gather.\n");
2442 return false;
2443 }
2444 }
2445 /* Gather-scatter accesses perform only component accesses, alignment
2446 is irrelevant for them. */
2447 *alignment_support_scheme = dr_unaligned_supported;
2448 }
2449 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2450 {
2451 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2452 masked_p,
2453 vls_type, memory_access_type, poffset,
2454 alignment_support_scheme,
2455 misalignment, gs_info, lanes_ifn))
2456 return false;
2457 }
2458 else if (STMT_VINFO_STRIDED_P (stmt_info))
2459 {
2460 gcc_assert (!slp_node);
2461 if (loop_vinfo
2462 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2463 masked_p, gs_info))
2464 *memory_access_type = VMAT_GATHER_SCATTER;
2465 else
2466 *memory_access_type = VMAT_ELEMENTWISE;
2467 /* Alignment is irrelevant here. */
2468 *alignment_support_scheme = dr_unaligned_supported;
2469 }
2470 else
2471 {
2472 int cmp = compare_step_with_zero (vinfo, stmt_info);
2473 if (cmp == 0)
2474 {
2475 gcc_assert (vls_type == VLS_LOAD);
2476 *memory_access_type = VMAT_INVARIANT;
2477 /* Invariant accesses perform only component accesses, alignment
2478 is irrelevant for them. */
2479 *alignment_support_scheme = dr_unaligned_supported;
2480 }
2481 else
2482 {
2483 if (cmp < 0)
2484 *memory_access_type = get_negative_load_store_type
2485 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2486 else
2487 *memory_access_type = VMAT_CONTIGUOUS;
2488 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2489 vectype, *poffset);
2490 *alignment_support_scheme
2491 = vect_supportable_dr_alignment (vinfo,
2492 STMT_VINFO_DR_INFO (stmt_info),
2493 vectype, *misalignment);
2494 }
2495 }
2496
2497 if ((*memory_access_type == VMAT_ELEMENTWISE
2498 || *memory_access_type == VMAT_STRIDED_SLP)
2499 && !nunits.is_constant ())
2500 {
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2503 "Not using elementwise accesses due to variable "
2504 "vectorization factor.\n");
2505 return false;
2506 }
2507
2508 if (*alignment_support_scheme == dr_unaligned_unsupported)
2509 {
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2512 "unsupported unaligned access\n");
2513 return false;
2514 }
2515
2516 /* FIXME: At the moment the cost model seems to underestimate the
2517 cost of using elementwise accesses. This check preserves the
2518 traditional behavior until that can be fixed. */
2519 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2520 if (!first_stmt_info)
2521 first_stmt_info = stmt_info;
2522 if (*memory_access_type == VMAT_ELEMENTWISE
2523 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2524 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2525 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2526 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2527 {
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2530 "not falling back to elementwise accesses\n");
2531 return false;
2532 }
2533 return true;
2534 }
2535
2536 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2537 conditional operation STMT_INFO. When returning true, store the mask
2538 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2539 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2540 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2541
2542 static bool
2543 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2544 slp_tree slp_node, unsigned mask_index,
2545 tree *mask, slp_tree *mask_node,
2546 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2547 {
2548 enum vect_def_type mask_dt;
2549 tree mask_vectype;
2550 slp_tree mask_node_1;
2551 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2552 mask, &mask_node_1, &mask_dt, &mask_vectype))
2553 {
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2556 "mask use not simple.\n");
2557 return false;
2558 }
2559
2560 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2561 {
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2564 "mask argument is not a boolean.\n");
2565 return false;
2566 }
2567
2568 /* If the caller is not prepared for adjusting an external/constant
2569 SLP mask vector type fail. */
2570 if (slp_node
2571 && !mask_node
2572 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2573 {
2574 if (dump_enabled_p ())
2575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2576 "SLP mask argument is not vectorized.\n");
2577 return false;
2578 }
2579
2580 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2581 if (!mask_vectype)
2582 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2583
2584 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2585 {
2586 if (dump_enabled_p ())
2587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2588 "could not find an appropriate vector mask type.\n");
2589 return false;
2590 }
2591
2592 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2593 TYPE_VECTOR_SUBPARTS (vectype)))
2594 {
2595 if (dump_enabled_p ())
2596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2597 "vector mask type %T"
2598 " does not match vector data type %T.\n",
2599 mask_vectype, vectype);
2600
2601 return false;
2602 }
2603
2604 *mask_dt_out = mask_dt;
2605 *mask_vectype_out = mask_vectype;
2606 if (mask_node)
2607 *mask_node = mask_node_1;
2608 return true;
2609 }
2610
2611 /* Return true if stored value RHS is suitable for vectorizing store
2612 statement STMT_INFO. When returning true, store the type of the
2613 definition in *RHS_DT_OUT, the type of the vectorized store value in
2614 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2615
2616 static bool
2617 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2618 slp_tree slp_node, tree rhs,
2619 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2620 vec_load_store_type *vls_type_out)
2621 {
2622 /* In the case this is a store from a constant make sure
2623 native_encode_expr can handle it. */
2624 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2625 {
2626 if (dump_enabled_p ())
2627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2628 "cannot encode constant as a byte sequence.\n");
2629 return false;
2630 }
2631
2632 int op_no = 0;
2633 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2634 {
2635 if (gimple_call_internal_p (call)
2636 && internal_store_fn_p (gimple_call_internal_fn (call)))
2637 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2638 if (slp_node)
2639 op_no = vect_slp_child_index_for_operand (call, op_no);
2640 }
2641
2642 enum vect_def_type rhs_dt;
2643 tree rhs_vectype;
2644 slp_tree slp_op;
2645 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2646 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2647 {
2648 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2650 "use not simple.\n");
2651 return false;
2652 }
2653
2654 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2655 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2656 {
2657 if (dump_enabled_p ())
2658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2659 "incompatible vector types.\n");
2660 return false;
2661 }
2662
2663 *rhs_dt_out = rhs_dt;
2664 *rhs_vectype_out = rhs_vectype;
2665 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2666 *vls_type_out = VLS_STORE_INVARIANT;
2667 else
2668 *vls_type_out = VLS_STORE;
2669 return true;
2670 }
2671
2672 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2673 Note that we support masks with floating-point type, in which case the
2674 floats are interpreted as a bitmask. */
2675
2676 static tree
2677 vect_build_all_ones_mask (vec_info *vinfo,
2678 stmt_vec_info stmt_info, tree masktype)
2679 {
2680 if (TREE_CODE (masktype) == INTEGER_TYPE)
2681 return build_int_cst (masktype, -1);
2682 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2683 {
2684 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2685 mask = build_vector_from_val (masktype, mask);
2686 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2687 }
2688 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2689 {
2690 REAL_VALUE_TYPE r;
2691 long tmp[6];
2692 for (int j = 0; j < 6; ++j)
2693 tmp[j] = -1;
2694 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2695 tree mask = build_real (TREE_TYPE (masktype), r);
2696 mask = build_vector_from_val (masktype, mask);
2697 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2698 }
2699 gcc_unreachable ();
2700 }
2701
2702 /* Build an all-zero merge value of type VECTYPE while vectorizing
2703 STMT_INFO as a gather load. */
2704
2705 static tree
2706 vect_build_zero_merge_argument (vec_info *vinfo,
2707 stmt_vec_info stmt_info, tree vectype)
2708 {
2709 tree merge;
2710 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2711 merge = build_int_cst (TREE_TYPE (vectype), 0);
2712 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2713 {
2714 REAL_VALUE_TYPE r;
2715 long tmp[6];
2716 for (int j = 0; j < 6; ++j)
2717 tmp[j] = 0;
2718 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2719 merge = build_real (TREE_TYPE (vectype), r);
2720 }
2721 else
2722 gcc_unreachable ();
2723 merge = build_vector_from_val (vectype, merge);
2724 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2725 }
2726
2727 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2728 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2729 the gather load operation. If the load is conditional, MASK is the
2730 unvectorized condition and MASK_DT is its definition type, otherwise
2731 MASK is null. */
2732
2733 static void
2734 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2735 gimple_stmt_iterator *gsi,
2736 gimple **vec_stmt,
2737 gather_scatter_info *gs_info,
2738 tree mask,
2739 stmt_vector_for_cost *cost_vec)
2740 {
2741 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2742 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2743 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2744 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2745 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2746 edge pe = loop_preheader_edge (loop);
2747 enum { NARROW, NONE, WIDEN } modifier;
2748 poly_uint64 gather_off_nunits
2749 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2750
2751 /* FIXME: Keep the previous costing way in vect_model_load_cost by costing
2752 N scalar loads, but it should be tweaked to use target specific costs
2753 on related gather load calls. */
2754 if (cost_vec)
2755 {
2756 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
2757 unsigned int inside_cost;
2758 inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
2759 scalar_load, stmt_info, 0, vect_body);
2760 if (dump_enabled_p ())
2761 dump_printf_loc (MSG_NOTE, vect_location,
2762 "vect_model_load_cost: inside_cost = %d, "
2763 "prologue_cost = 0 .\n",
2764 inside_cost);
2765 return;
2766 }
2767
2768 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2769 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2770 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2771 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2772 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2773 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2774 tree scaletype = TREE_VALUE (arglist);
2775 tree real_masktype = masktype;
2776 gcc_checking_assert (types_compatible_p (srctype, rettype)
2777 && (!mask
2778 || TREE_CODE (masktype) == INTEGER_TYPE
2779 || types_compatible_p (srctype, masktype)));
2780 if (mask)
2781 masktype = truth_type_for (srctype);
2782
2783 tree mask_halftype = masktype;
2784 tree perm_mask = NULL_TREE;
2785 tree mask_perm_mask = NULL_TREE;
2786 if (known_eq (nunits, gather_off_nunits))
2787 modifier = NONE;
2788 else if (known_eq (nunits * 2, gather_off_nunits))
2789 {
2790 modifier = WIDEN;
2791
2792 /* Currently widening gathers and scatters are only supported for
2793 fixed-length vectors. */
2794 int count = gather_off_nunits.to_constant ();
2795 vec_perm_builder sel (count, count, 1);
2796 for (int i = 0; i < count; ++i)
2797 sel.quick_push (i | (count / 2));
2798
2799 vec_perm_indices indices (sel, 1, count);
2800 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2801 indices);
2802 }
2803 else if (known_eq (nunits, gather_off_nunits * 2))
2804 {
2805 modifier = NARROW;
2806
2807 /* Currently narrowing gathers and scatters are only supported for
2808 fixed-length vectors. */
2809 int count = nunits.to_constant ();
2810 vec_perm_builder sel (count, count, 1);
2811 sel.quick_grow (count);
2812 for (int i = 0; i < count; ++i)
2813 sel[i] = i < count / 2 ? i : i + count / 2;
2814 vec_perm_indices indices (sel, 2, count);
2815 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2816
2817 ncopies *= 2;
2818
2819 if (mask && VECTOR_TYPE_P (real_masktype))
2820 {
2821 for (int i = 0; i < count; ++i)
2822 sel[i] = i | (count / 2);
2823 indices.new_vector (sel, 2, count);
2824 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2825 }
2826 else if (mask)
2827 mask_halftype = truth_type_for (gs_info->offset_vectype);
2828 }
2829 else
2830 gcc_unreachable ();
2831
2832 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2833 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2834
2835 tree ptr = fold_convert (ptrtype, gs_info->base);
2836 if (!is_gimple_min_invariant (ptr))
2837 {
2838 gimple_seq seq;
2839 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2840 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2841 gcc_assert (!new_bb);
2842 }
2843
2844 tree scale = build_int_cst (scaletype, gs_info->scale);
2845
2846 tree vec_oprnd0 = NULL_TREE;
2847 tree vec_mask = NULL_TREE;
2848 tree src_op = NULL_TREE;
2849 tree mask_op = NULL_TREE;
2850 tree prev_res = NULL_TREE;
2851
2852 if (!mask)
2853 {
2854 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2855 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2856 }
2857
2858 auto_vec<tree> vec_oprnds0;
2859 auto_vec<tree> vec_masks;
2860 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2861 modifier == WIDEN ? ncopies / 2 : ncopies,
2862 gs_info->offset, &vec_oprnds0);
2863 if (mask)
2864 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2865 modifier == NARROW ? ncopies / 2 : ncopies,
2866 mask, &vec_masks, masktype);
2867 for (int j = 0; j < ncopies; ++j)
2868 {
2869 tree op, var;
2870 if (modifier == WIDEN && (j & 1))
2871 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2872 perm_mask, stmt_info, gsi);
2873 else
2874 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2875
2876 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2877 {
2878 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2879 TYPE_VECTOR_SUBPARTS (idxtype)));
2880 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2881 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2882 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2883 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2884 op = var;
2885 }
2886
2887 if (mask)
2888 {
2889 if (mask_perm_mask && (j & 1))
2890 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2891 mask_perm_mask, stmt_info, gsi);
2892 else
2893 {
2894 if (modifier == NARROW)
2895 {
2896 if ((j & 1) == 0)
2897 vec_mask = vec_masks[j / 2];
2898 }
2899 else
2900 vec_mask = vec_masks[j];
2901
2902 mask_op = vec_mask;
2903 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2904 {
2905 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2906 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2907 gcc_assert (known_eq (sub1, sub2));
2908 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2909 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2910 gassign *new_stmt
2911 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2912 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2913 mask_op = var;
2914 }
2915 }
2916 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2917 {
2918 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2919 gassign *new_stmt
2920 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2921 : VEC_UNPACK_LO_EXPR,
2922 mask_op);
2923 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2924 mask_op = var;
2925 }
2926 src_op = mask_op;
2927 }
2928
2929 tree mask_arg = mask_op;
2930 if (masktype != real_masktype)
2931 {
2932 tree utype, optype = TREE_TYPE (mask_op);
2933 if (VECTOR_TYPE_P (real_masktype)
2934 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2935 utype = real_masktype;
2936 else
2937 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2938 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2939 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2940 gassign *new_stmt
2941 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2942 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2943 mask_arg = var;
2944 if (!useless_type_conversion_p (real_masktype, utype))
2945 {
2946 gcc_assert (TYPE_PRECISION (utype)
2947 <= TYPE_PRECISION (real_masktype));
2948 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2949 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2950 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2951 mask_arg = var;
2952 }
2953 src_op = build_zero_cst (srctype);
2954 }
2955 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2956 mask_arg, scale);
2957
2958 if (!useless_type_conversion_p (vectype, rettype))
2959 {
2960 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2961 TYPE_VECTOR_SUBPARTS (rettype)));
2962 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2963 gimple_call_set_lhs (new_stmt, op);
2964 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2965 var = make_ssa_name (vec_dest);
2966 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2967 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2968 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2969 }
2970 else
2971 {
2972 var = make_ssa_name (vec_dest, new_stmt);
2973 gimple_call_set_lhs (new_stmt, var);
2974 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2975 }
2976
2977 if (modifier == NARROW)
2978 {
2979 if ((j & 1) == 0)
2980 {
2981 prev_res = var;
2982 continue;
2983 }
2984 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2985 stmt_info, gsi);
2986 new_stmt = SSA_NAME_DEF_STMT (var);
2987 }
2988
2989 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2990 }
2991 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2992 }
2993
2994 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2995 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2996 the scatter store operation. If the store is conditional, MASK is the
2997 unvectorized condition, otherwise MASK is null. */
2998
2999 static void
3000 vect_build_scatter_store_calls (vec_info *vinfo, stmt_vec_info stmt_info,
3001 gimple_stmt_iterator *gsi, gimple **vec_stmt,
3002 gather_scatter_info *gs_info, tree mask)
3003 {
3004 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3005 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3006 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3007 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
3008 enum { NARROW, NONE, WIDEN } modifier;
3009 poly_uint64 scatter_off_nunits
3010 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
3011
3012 tree perm_mask = NULL_TREE, mask_halfvectype = NULL_TREE;
3013 if (known_eq (nunits, scatter_off_nunits))
3014 modifier = NONE;
3015 else if (known_eq (nunits * 2, scatter_off_nunits))
3016 {
3017 modifier = WIDEN;
3018
3019 /* Currently gathers and scatters are only supported for
3020 fixed-length vectors. */
3021 unsigned int count = scatter_off_nunits.to_constant ();
3022 vec_perm_builder sel (count, count, 1);
3023 for (unsigned i = 0; i < (unsigned int) count; ++i)
3024 sel.quick_push (i | (count / 2));
3025
3026 vec_perm_indices indices (sel, 1, count);
3027 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype, indices);
3028 gcc_assert (perm_mask != NULL_TREE);
3029 }
3030 else if (known_eq (nunits, scatter_off_nunits * 2))
3031 {
3032 modifier = NARROW;
3033
3034 /* Currently gathers and scatters are only supported for
3035 fixed-length vectors. */
3036 unsigned int count = nunits.to_constant ();
3037 vec_perm_builder sel (count, count, 1);
3038 for (unsigned i = 0; i < (unsigned int) count; ++i)
3039 sel.quick_push (i | (count / 2));
3040
3041 vec_perm_indices indices (sel, 2, count);
3042 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3043 gcc_assert (perm_mask != NULL_TREE);
3044 ncopies *= 2;
3045
3046 if (mask)
3047 mask_halfvectype = truth_type_for (gs_info->offset_vectype);
3048 }
3049 else
3050 gcc_unreachable ();
3051
3052 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
3053 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
3054 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3055 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3056 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3057 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3058 tree scaletype = TREE_VALUE (arglist);
3059
3060 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
3061 && TREE_CODE (rettype) == VOID_TYPE);
3062
3063 tree ptr = fold_convert (ptrtype, gs_info->base);
3064 if (!is_gimple_min_invariant (ptr))
3065 {
3066 gimple_seq seq;
3067 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
3068 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3069 edge pe = loop_preheader_edge (loop);
3070 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
3071 gcc_assert (!new_bb);
3072 }
3073
3074 tree mask_arg = NULL_TREE;
3075 if (mask == NULL_TREE)
3076 {
3077 mask_arg = build_int_cst (masktype, -1);
3078 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
3079 }
3080
3081 tree scale = build_int_cst (scaletype, gs_info->scale);
3082
3083 auto_vec<tree> vec_oprnds0;
3084 auto_vec<tree> vec_oprnds1;
3085 auto_vec<tree> vec_masks;
3086 if (mask)
3087 {
3088 tree mask_vectype = truth_type_for (vectype);
3089 vect_get_vec_defs_for_operand (vinfo, stmt_info,
3090 modifier == NARROW ? ncopies / 2 : ncopies,
3091 mask, &vec_masks, mask_vectype);
3092 }
3093 vect_get_vec_defs_for_operand (vinfo, stmt_info,
3094 modifier == WIDEN ? ncopies / 2 : ncopies,
3095 gs_info->offset, &vec_oprnds0);
3096 tree op = vect_get_store_rhs (stmt_info);
3097 vect_get_vec_defs_for_operand (vinfo, stmt_info,
3098 modifier == NARROW ? ncopies / 2 : ncopies, op,
3099 &vec_oprnds1);
3100
3101 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3102 tree mask_op = NULL_TREE;
3103 tree src, vec_mask;
3104 for (int j = 0; j < ncopies; ++j)
3105 {
3106 if (modifier == WIDEN)
3107 {
3108 if (j & 1)
3109 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0, perm_mask,
3110 stmt_info, gsi);
3111 else
3112 op = vec_oprnd0 = vec_oprnds0[j / 2];
3113 src = vec_oprnd1 = vec_oprnds1[j];
3114 if (mask)
3115 mask_op = vec_mask = vec_masks[j];
3116 }
3117 else if (modifier == NARROW)
3118 {
3119 if (j & 1)
3120 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
3121 perm_mask, stmt_info, gsi);
3122 else
3123 src = vec_oprnd1 = vec_oprnds1[j / 2];
3124 op = vec_oprnd0 = vec_oprnds0[j];
3125 if (mask)
3126 mask_op = vec_mask = vec_masks[j / 2];
3127 }
3128 else
3129 {
3130 op = vec_oprnd0 = vec_oprnds0[j];
3131 src = vec_oprnd1 = vec_oprnds1[j];
3132 if (mask)
3133 mask_op = vec_mask = vec_masks[j];
3134 }
3135
3136 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
3137 {
3138 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
3139 TYPE_VECTOR_SUBPARTS (srctype)));
3140 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
3141 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
3142 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
3143 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3144 src = var;
3145 }
3146
3147 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
3148 {
3149 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
3150 TYPE_VECTOR_SUBPARTS (idxtype)));
3151 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3152 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
3153 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3154 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3155 op = var;
3156 }
3157
3158 if (mask)
3159 {
3160 tree utype;
3161 mask_arg = mask_op;
3162 if (modifier == NARROW)
3163 {
3164 tree var
3165 = vect_get_new_ssa_name (mask_halfvectype, vect_simple_var);
3166 gassign *new_stmt
3167 = gimple_build_assign (var,
3168 (j & 1) ? VEC_UNPACK_HI_EXPR
3169 : VEC_UNPACK_LO_EXPR,
3170 mask_op);
3171 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3172 mask_arg = var;
3173 }
3174 tree optype = TREE_TYPE (mask_arg);
3175 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
3176 utype = masktype;
3177 else
3178 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3179 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
3180 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
3181 gassign *new_stmt
3182 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3183 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3184 mask_arg = var;
3185 if (!useless_type_conversion_p (masktype, utype))
3186 {
3187 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
3188 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
3189 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3190 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3191 mask_arg = var;
3192 }
3193 }
3194
3195 gcall *new_stmt
3196 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
3197 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3198
3199 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3200 }
3201 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3202 }
3203
3204 /* Prepare the base and offset in GS_INFO for vectorization.
3205 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3206 to the vectorized offset argument for the first copy of STMT_INFO.
3207 STMT_INFO is the statement described by GS_INFO and LOOP is the
3208 containing loop. */
3209
3210 static void
3211 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3212 class loop *loop, stmt_vec_info stmt_info,
3213 slp_tree slp_node, gather_scatter_info *gs_info,
3214 tree *dataref_ptr, vec<tree> *vec_offset)
3215 {
3216 gimple_seq stmts = NULL;
3217 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3218 if (stmts != NULL)
3219 {
3220 basic_block new_bb;
3221 edge pe = loop_preheader_edge (loop);
3222 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3223 gcc_assert (!new_bb);
3224 }
3225 if (slp_node)
3226 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3227 else
3228 {
3229 unsigned ncopies
3230 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3231 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3232 gs_info->offset, vec_offset,
3233 gs_info->offset_vectype);
3234 }
3235 }
3236
3237 /* Prepare to implement a grouped or strided load or store using
3238 the gather load or scatter store operation described by GS_INFO.
3239 STMT_INFO is the load or store statement.
3240
3241 Set *DATAREF_BUMP to the amount that should be added to the base
3242 address after each copy of the vectorized statement. Set *VEC_OFFSET
3243 to an invariant offset vector in which element I has the value
3244 I * DR_STEP / SCALE. */
3245
3246 static void
3247 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3248 loop_vec_info loop_vinfo,
3249 gimple_stmt_iterator *gsi,
3250 gather_scatter_info *gs_info,
3251 tree *dataref_bump, tree *vec_offset,
3252 vec_loop_lens *loop_lens)
3253 {
3254 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3255 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3256
3257 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3258 {
3259 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
3260 ivtmp_8 = _31 * 16 (step in bytes);
3261 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
3262 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
3263 tree loop_len
3264 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
3265 tree tmp
3266 = fold_build2 (MULT_EXPR, sizetype,
3267 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3268 loop_len);
3269 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
3270 GSI_SAME_STMT);
3271 }
3272 else
3273 {
3274 tree bump
3275 = size_binop (MULT_EXPR,
3276 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3277 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3278 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3279 }
3280
3281 /* The offset given in GS_INFO can have pointer type, so use the element
3282 type of the vector instead. */
3283 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3284
3285 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3286 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3287 ssize_int (gs_info->scale));
3288 step = fold_convert (offset_type, step);
3289
3290 /* Create {0, X, X*2, X*3, ...}. */
3291 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3292 build_zero_cst (offset_type), step);
3293 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3294 }
3295
3296 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3297 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3298 allow each iteration process the flexible number of elements as long as
3299 the number <= vf elments.
3300
3301 Return data reference according to SELECT_VL.
3302 If new statements are needed, insert them before GSI. */
3303
3304 static tree
3305 vect_get_loop_variant_data_ptr_increment (
3306 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3307 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3308 vect_memory_access_type memory_access_type)
3309 {
3310 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3311 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3312
3313 /* gather/scatter never reach here. */
3314 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
3315
3316 /* When we support SELECT_VL pattern, we dynamic adjust
3317 the memory address by .SELECT_VL result.
3318
3319 The result of .SELECT_VL is the number of elements to
3320 be processed of each iteration. So the memory address
3321 adjustment operation should be:
3322
3323 addr = addr + .SELECT_VL (ARG..) * step;
3324 */
3325 tree loop_len
3326 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
3327 tree len_type = TREE_TYPE (loop_len);
3328 /* Since the outcome of .SELECT_VL is element size, we should adjust
3329 it into bytesize so that it can be used in address pointer variable
3330 amount IVs adjustment. */
3331 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3332 wide_int_to_tree (len_type, wi::to_widest (step)));
3333 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3334 gassign *assign = gimple_build_assign (bump, tmp);
3335 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3336 return bump;
3337 }
3338
3339 /* Return the amount that should be added to a vector pointer to move
3340 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3341 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3342 vectorization. */
3343
3344 static tree
3345 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3346 dr_vec_info *dr_info, tree aggr_type,
3347 vect_memory_access_type memory_access_type,
3348 vec_loop_lens *loop_lens = nullptr)
3349 {
3350 if (memory_access_type == VMAT_INVARIANT)
3351 return size_zero_node;
3352
3353 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3354 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3355 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3356 loop_lens, dr_info,
3357 memory_access_type);
3358
3359 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3360 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3361 if (tree_int_cst_sgn (step) == -1)
3362 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3363 return iv_step;
3364 }
3365
3366 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3367
3368 static bool
3369 vectorizable_bswap (vec_info *vinfo,
3370 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3371 gimple **vec_stmt, slp_tree slp_node,
3372 slp_tree *slp_op,
3373 tree vectype_in, stmt_vector_for_cost *cost_vec)
3374 {
3375 tree op, vectype;
3376 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3377 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3378 unsigned ncopies;
3379
3380 op = gimple_call_arg (stmt, 0);
3381 vectype = STMT_VINFO_VECTYPE (stmt_info);
3382 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3383
3384 /* Multiple types in SLP are handled by creating the appropriate number of
3385 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3386 case of SLP. */
3387 if (slp_node)
3388 ncopies = 1;
3389 else
3390 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3391
3392 gcc_assert (ncopies >= 1);
3393
3394 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3395 if (! char_vectype)
3396 return false;
3397
3398 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3399 unsigned word_bytes;
3400 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3401 return false;
3402
3403 /* The encoding uses one stepped pattern for each byte in the word. */
3404 vec_perm_builder elts (num_bytes, word_bytes, 3);
3405 for (unsigned i = 0; i < 3; ++i)
3406 for (unsigned j = 0; j < word_bytes; ++j)
3407 elts.quick_push ((i + 1) * word_bytes - j - 1);
3408
3409 vec_perm_indices indices (elts, 1, num_bytes);
3410 machine_mode vmode = TYPE_MODE (char_vectype);
3411 if (!can_vec_perm_const_p (vmode, vmode, indices))
3412 return false;
3413
3414 if (! vec_stmt)
3415 {
3416 if (slp_node
3417 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3418 {
3419 if (dump_enabled_p ())
3420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3421 "incompatible vector types for invariants\n");
3422 return false;
3423 }
3424
3425 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3426 DUMP_VECT_SCOPE ("vectorizable_bswap");
3427 record_stmt_cost (cost_vec,
3428 1, vector_stmt, stmt_info, 0, vect_prologue);
3429 record_stmt_cost (cost_vec,
3430 slp_node
3431 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3432 vec_perm, stmt_info, 0, vect_body);
3433 return true;
3434 }
3435
3436 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3437
3438 /* Transform. */
3439 vec<tree> vec_oprnds = vNULL;
3440 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3441 op, &vec_oprnds);
3442 /* Arguments are ready. create the new vector stmt. */
3443 unsigned i;
3444 tree vop;
3445 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3446 {
3447 gimple *new_stmt;
3448 tree tem = make_ssa_name (char_vectype);
3449 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3450 char_vectype, vop));
3451 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3452 tree tem2 = make_ssa_name (char_vectype);
3453 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3454 tem, tem, bswap_vconst);
3455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3456 tem = make_ssa_name (vectype);
3457 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3458 vectype, tem2));
3459 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3460 if (slp_node)
3461 slp_node->push_vec_def (new_stmt);
3462 else
3463 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3464 }
3465
3466 if (!slp_node)
3467 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3468
3469 vec_oprnds.release ();
3470 return true;
3471 }
3472
3473 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3474 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3475 in a single step. On success, store the binary pack code in
3476 *CONVERT_CODE. */
3477
3478 static bool
3479 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3480 code_helper *convert_code)
3481 {
3482 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3483 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3484 return false;
3485
3486 code_helper code;
3487 int multi_step_cvt = 0;
3488 auto_vec <tree, 8> interm_types;
3489 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3490 &code, &multi_step_cvt, &interm_types)
3491 || multi_step_cvt)
3492 return false;
3493
3494 *convert_code = code;
3495 return true;
3496 }
3497
3498 /* Function vectorizable_call.
3499
3500 Check if STMT_INFO performs a function call that can be vectorized.
3501 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3502 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3503 Return true if STMT_INFO is vectorizable in this way. */
3504
3505 static bool
3506 vectorizable_call (vec_info *vinfo,
3507 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3508 gimple **vec_stmt, slp_tree slp_node,
3509 stmt_vector_for_cost *cost_vec)
3510 {
3511 gcall *stmt;
3512 tree vec_dest;
3513 tree scalar_dest;
3514 tree op;
3515 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3516 tree vectype_out, vectype_in;
3517 poly_uint64 nunits_in;
3518 poly_uint64 nunits_out;
3519 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3520 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3521 tree fndecl, new_temp, rhs_type;
3522 enum vect_def_type dt[4]
3523 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3524 vect_unknown_def_type };
3525 tree vectypes[ARRAY_SIZE (dt)] = {};
3526 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3527 int ndts = ARRAY_SIZE (dt);
3528 int ncopies, j;
3529 auto_vec<tree, 8> vargs;
3530 enum { NARROW, NONE, WIDEN } modifier;
3531 size_t i, nargs;
3532 tree lhs;
3533
3534 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3535 return false;
3536
3537 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3538 && ! vec_stmt)
3539 return false;
3540
3541 /* Is STMT_INFO a vectorizable call? */
3542 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3543 if (!stmt)
3544 return false;
3545
3546 if (gimple_call_internal_p (stmt)
3547 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3548 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3549 /* Handled by vectorizable_load and vectorizable_store. */
3550 return false;
3551
3552 if (gimple_call_lhs (stmt) == NULL_TREE
3553 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3554 return false;
3555
3556 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3557
3558 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3559
3560 /* Process function arguments. */
3561 rhs_type = NULL_TREE;
3562 vectype_in = NULL_TREE;
3563 nargs = gimple_call_num_args (stmt);
3564
3565 /* Bail out if the function has more than four arguments, we do not have
3566 interesting builtin functions to vectorize with more than two arguments
3567 except for fma. No arguments is also not good. */
3568 if (nargs == 0 || nargs > 4)
3569 return false;
3570
3571 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3572 combined_fn cfn = gimple_call_combined_fn (stmt);
3573 if (cfn == CFN_GOMP_SIMD_LANE)
3574 {
3575 nargs = 0;
3576 rhs_type = unsigned_type_node;
3577 }
3578
3579 int mask_opno = -1;
3580 if (internal_fn_p (cfn))
3581 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3582
3583 for (i = 0; i < nargs; i++)
3584 {
3585 if ((int) i == mask_opno)
3586 {
3587 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3588 &op, &slp_op[i], &dt[i], &vectypes[i]))
3589 return false;
3590 continue;
3591 }
3592
3593 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3594 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3595 {
3596 if (dump_enabled_p ())
3597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3598 "use not simple.\n");
3599 return false;
3600 }
3601
3602 /* We can only handle calls with arguments of the same type. */
3603 if (rhs_type
3604 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3605 {
3606 if (dump_enabled_p ())
3607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3608 "argument types differ.\n");
3609 return false;
3610 }
3611 if (!rhs_type)
3612 rhs_type = TREE_TYPE (op);
3613
3614 if (!vectype_in)
3615 vectype_in = vectypes[i];
3616 else if (vectypes[i]
3617 && !types_compatible_p (vectypes[i], vectype_in))
3618 {
3619 if (dump_enabled_p ())
3620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3621 "argument vector types differ.\n");
3622 return false;
3623 }
3624 }
3625 /* If all arguments are external or constant defs, infer the vector type
3626 from the scalar type. */
3627 if (!vectype_in)
3628 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3629 if (vec_stmt)
3630 gcc_assert (vectype_in);
3631 if (!vectype_in)
3632 {
3633 if (dump_enabled_p ())
3634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3635 "no vectype for scalar type %T\n", rhs_type);
3636
3637 return false;
3638 }
3639 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3640 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3641 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3642 by a pack of the two vectors into an SI vector. We would need
3643 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3644 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3645 {
3646 if (dump_enabled_p ())
3647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3648 "mismatched vector sizes %T and %T\n",
3649 vectype_in, vectype_out);
3650 return false;
3651 }
3652
3653 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3654 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3655 {
3656 if (dump_enabled_p ())
3657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3658 "mixed mask and nonmask vector types\n");
3659 return false;
3660 }
3661
3662 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3663 {
3664 if (dump_enabled_p ())
3665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3666 "use emulated vector type for call\n");
3667 return false;
3668 }
3669
3670 /* FORNOW */
3671 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3672 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3673 if (known_eq (nunits_in * 2, nunits_out))
3674 modifier = NARROW;
3675 else if (known_eq (nunits_out, nunits_in))
3676 modifier = NONE;
3677 else if (known_eq (nunits_out * 2, nunits_in))
3678 modifier = WIDEN;
3679 else
3680 return false;
3681
3682 /* We only handle functions that do not read or clobber memory. */
3683 if (gimple_vuse (stmt))
3684 {
3685 if (dump_enabled_p ())
3686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3687 "function reads from or writes to memory.\n");
3688 return false;
3689 }
3690
3691 /* For now, we only vectorize functions if a target specific builtin
3692 is available. TODO -- in some cases, it might be profitable to
3693 insert the calls for pieces of the vector, in order to be able
3694 to vectorize other operations in the loop. */
3695 fndecl = NULL_TREE;
3696 internal_fn ifn = IFN_LAST;
3697 tree callee = gimple_call_fndecl (stmt);
3698
3699 /* First try using an internal function. */
3700 code_helper convert_code = MAX_TREE_CODES;
3701 if (cfn != CFN_LAST
3702 && (modifier == NONE
3703 || (modifier == NARROW
3704 && simple_integer_narrowing (vectype_out, vectype_in,
3705 &convert_code))))
3706 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3707 vectype_in);
3708
3709 /* If that fails, try asking for a target-specific built-in function. */
3710 if (ifn == IFN_LAST)
3711 {
3712 if (cfn != CFN_LAST)
3713 fndecl = targetm.vectorize.builtin_vectorized_function
3714 (cfn, vectype_out, vectype_in);
3715 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3716 fndecl = targetm.vectorize.builtin_md_vectorized_function
3717 (callee, vectype_out, vectype_in);
3718 }
3719
3720 if (ifn == IFN_LAST && !fndecl)
3721 {
3722 if (cfn == CFN_GOMP_SIMD_LANE
3723 && !slp_node
3724 && loop_vinfo
3725 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3726 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3727 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3728 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3729 {
3730 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3731 { 0, 1, 2, ... vf - 1 } vector. */
3732 gcc_assert (nargs == 0);
3733 }
3734 else if (modifier == NONE
3735 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3736 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3737 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3738 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3739 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3740 slp_op, vectype_in, cost_vec);
3741 else
3742 {
3743 if (dump_enabled_p ())
3744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3745 "function is not vectorizable.\n");
3746 return false;
3747 }
3748 }
3749
3750 if (slp_node)
3751 ncopies = 1;
3752 else if (modifier == NARROW && ifn == IFN_LAST)
3753 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3754 else
3755 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3756
3757 /* Sanity check: make sure that at least one copy of the vectorized stmt
3758 needs to be generated. */
3759 gcc_assert (ncopies >= 1);
3760
3761 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3762 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3763 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3764 int len_opno = internal_fn_len_index (cond_len_fn);
3765 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3766 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3767 if (!vec_stmt) /* transformation not required. */
3768 {
3769 if (slp_node)
3770 for (i = 0; i < nargs; ++i)
3771 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3772 vectypes[i]
3773 ? vectypes[i] : vectype_in))
3774 {
3775 if (dump_enabled_p ())
3776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3777 "incompatible vector types for invariants\n");
3778 return false;
3779 }
3780 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3781 DUMP_VECT_SCOPE ("vectorizable_call");
3782 vect_model_simple_cost (vinfo, stmt_info,
3783 ncopies, dt, ndts, slp_node, cost_vec);
3784 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3785 record_stmt_cost (cost_vec, ncopies / 2,
3786 vec_promote_demote, stmt_info, 0, vect_body);
3787
3788 if (loop_vinfo
3789 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3790 && (reduc_idx >= 0 || mask_opno >= 0))
3791 {
3792 if (reduc_idx >= 0
3793 && (cond_fn == IFN_LAST
3794 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3795 OPTIMIZE_FOR_SPEED))
3796 && (cond_len_fn == IFN_LAST
3797 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3798 OPTIMIZE_FOR_SPEED)))
3799 {
3800 if (dump_enabled_p ())
3801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3802 "can't use a fully-masked loop because no"
3803 " conditional operation is available.\n");
3804 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3805 }
3806 else
3807 {
3808 unsigned int nvectors
3809 = (slp_node
3810 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3811 : ncopies);
3812 tree scalar_mask = NULL_TREE;
3813 if (mask_opno >= 0)
3814 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3815 if (cond_len_fn != IFN_LAST
3816 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3817 OPTIMIZE_FOR_SPEED))
3818 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3819 1);
3820 else
3821 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3822 scalar_mask);
3823 }
3824 }
3825 return true;
3826 }
3827
3828 /* Transform. */
3829
3830 if (dump_enabled_p ())
3831 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3832
3833 /* Handle def. */
3834 scalar_dest = gimple_call_lhs (stmt);
3835 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3836
3837 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3838 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3839 unsigned int vect_nargs = nargs;
3840 if (len_loop_p)
3841 {
3842 if (len_opno >= 0)
3843 {
3844 ifn = cond_len_fn;
3845 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3846 vect_nargs += 2;
3847 }
3848 else if (reduc_idx >= 0)
3849 gcc_unreachable ();
3850 }
3851 else if (masked_loop_p && reduc_idx >= 0)
3852 {
3853 ifn = cond_fn;
3854 vect_nargs += 2;
3855 }
3856
3857 if (modifier == NONE || ifn != IFN_LAST)
3858 {
3859 tree prev_res = NULL_TREE;
3860 vargs.safe_grow (vect_nargs, true);
3861 auto_vec<vec<tree> > vec_defs (nargs);
3862 for (j = 0; j < ncopies; ++j)
3863 {
3864 /* Build argument list for the vectorized call. */
3865 if (slp_node)
3866 {
3867 vec<tree> vec_oprnds0;
3868
3869 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3870 vec_oprnds0 = vec_defs[0];
3871
3872 /* Arguments are ready. Create the new vector stmt. */
3873 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3874 {
3875 int varg = 0;
3876 if (masked_loop_p && reduc_idx >= 0)
3877 {
3878 unsigned int vec_num = vec_oprnds0.length ();
3879 /* Always true for SLP. */
3880 gcc_assert (ncopies == 1);
3881 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3882 gsi, masks, vec_num,
3883 vectype_out, i);
3884 }
3885 size_t k;
3886 for (k = 0; k < nargs; k++)
3887 {
3888 vec<tree> vec_oprndsk = vec_defs[k];
3889 vargs[varg++] = vec_oprndsk[i];
3890 }
3891 if (masked_loop_p && reduc_idx >= 0)
3892 vargs[varg++] = vargs[reduc_idx + 1];
3893 gimple *new_stmt;
3894 if (modifier == NARROW)
3895 {
3896 /* We don't define any narrowing conditional functions
3897 at present. */
3898 gcc_assert (mask_opno < 0);
3899 tree half_res = make_ssa_name (vectype_in);
3900 gcall *call
3901 = gimple_build_call_internal_vec (ifn, vargs);
3902 gimple_call_set_lhs (call, half_res);
3903 gimple_call_set_nothrow (call, true);
3904 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3905 if ((i & 1) == 0)
3906 {
3907 prev_res = half_res;
3908 continue;
3909 }
3910 new_temp = make_ssa_name (vec_dest);
3911 new_stmt = vect_gimple_build (new_temp, convert_code,
3912 prev_res, half_res);
3913 vect_finish_stmt_generation (vinfo, stmt_info,
3914 new_stmt, gsi);
3915 }
3916 else
3917 {
3918 if (len_opno >= 0 && len_loop_p)
3919 {
3920 unsigned int vec_num = vec_oprnds0.length ();
3921 /* Always true for SLP. */
3922 gcc_assert (ncopies == 1);
3923 tree len
3924 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3925 vectype_out, i, 1);
3926 signed char biasval
3927 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3928 tree bias = build_int_cst (intQI_type_node, biasval);
3929 vargs[len_opno] = len;
3930 vargs[len_opno + 1] = bias;
3931 }
3932 else if (mask_opno >= 0 && masked_loop_p)
3933 {
3934 unsigned int vec_num = vec_oprnds0.length ();
3935 /* Always true for SLP. */
3936 gcc_assert (ncopies == 1);
3937 tree mask = vect_get_loop_mask (loop_vinfo,
3938 gsi, masks, vec_num,
3939 vectype_out, i);
3940 vargs[mask_opno] = prepare_vec_mask
3941 (loop_vinfo, TREE_TYPE (mask), mask,
3942 vargs[mask_opno], gsi);
3943 }
3944
3945 gcall *call;
3946 if (ifn != IFN_LAST)
3947 call = gimple_build_call_internal_vec (ifn, vargs);
3948 else
3949 call = gimple_build_call_vec (fndecl, vargs);
3950 new_temp = make_ssa_name (vec_dest, call);
3951 gimple_call_set_lhs (call, new_temp);
3952 gimple_call_set_nothrow (call, true);
3953 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3954 new_stmt = call;
3955 }
3956 slp_node->push_vec_def (new_stmt);
3957 }
3958 continue;
3959 }
3960
3961 int varg = 0;
3962 if (masked_loop_p && reduc_idx >= 0)
3963 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3964 vectype_out, j);
3965 for (i = 0; i < nargs; i++)
3966 {
3967 op = gimple_call_arg (stmt, i);
3968 if (j == 0)
3969 {
3970 vec_defs.quick_push (vNULL);
3971 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3972 op, &vec_defs[i],
3973 vectypes[i]);
3974 }
3975 vargs[varg++] = vec_defs[i][j];
3976 }
3977 if (masked_loop_p && reduc_idx >= 0)
3978 vargs[varg++] = vargs[reduc_idx + 1];
3979
3980 if (len_opno >= 0 && len_loop_p)
3981 {
3982 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3983 vectype_out, j, 1);
3984 signed char biasval
3985 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3986 tree bias = build_int_cst (intQI_type_node, biasval);
3987 vargs[len_opno] = len;
3988 vargs[len_opno + 1] = bias;
3989 }
3990 else if (mask_opno >= 0 && masked_loop_p)
3991 {
3992 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3993 vectype_out, j);
3994 vargs[mask_opno]
3995 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3996 vargs[mask_opno], gsi);
3997 }
3998
3999 gimple *new_stmt;
4000 if (cfn == CFN_GOMP_SIMD_LANE)
4001 {
4002 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
4003 tree new_var
4004 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
4005 gimple *init_stmt = gimple_build_assign (new_var, cst);
4006 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
4007 new_temp = make_ssa_name (vec_dest);
4008 new_stmt = gimple_build_assign (new_temp, new_var);
4009 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4010 }
4011 else if (modifier == NARROW)
4012 {
4013 /* We don't define any narrowing conditional functions at
4014 present. */
4015 gcc_assert (mask_opno < 0);
4016 tree half_res = make_ssa_name (vectype_in);
4017 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
4018 gimple_call_set_lhs (call, half_res);
4019 gimple_call_set_nothrow (call, true);
4020 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
4021 if ((j & 1) == 0)
4022 {
4023 prev_res = half_res;
4024 continue;
4025 }
4026 new_temp = make_ssa_name (vec_dest);
4027 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
4028 half_res);
4029 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4030 }
4031 else
4032 {
4033 gcall *call;
4034 if (ifn != IFN_LAST)
4035 call = gimple_build_call_internal_vec (ifn, vargs);
4036 else
4037 call = gimple_build_call_vec (fndecl, vargs);
4038 new_temp = make_ssa_name (vec_dest, call);
4039 gimple_call_set_lhs (call, new_temp);
4040 gimple_call_set_nothrow (call, true);
4041 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
4042 new_stmt = call;
4043 }
4044
4045 if (j == (modifier == NARROW ? 1 : 0))
4046 *vec_stmt = new_stmt;
4047 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4048 }
4049 for (i = 0; i < nargs; i++)
4050 {
4051 vec<tree> vec_oprndsi = vec_defs[i];
4052 vec_oprndsi.release ();
4053 }
4054 }
4055 else if (modifier == NARROW)
4056 {
4057 auto_vec<vec<tree> > vec_defs (nargs);
4058 /* We don't define any narrowing conditional functions at present. */
4059 gcc_assert (mask_opno < 0);
4060 for (j = 0; j < ncopies; ++j)
4061 {
4062 /* Build argument list for the vectorized call. */
4063 if (j == 0)
4064 vargs.create (nargs * 2);
4065 else
4066 vargs.truncate (0);
4067
4068 if (slp_node)
4069 {
4070 vec<tree> vec_oprnds0;
4071
4072 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
4073 vec_oprnds0 = vec_defs[0];
4074
4075 /* Arguments are ready. Create the new vector stmt. */
4076 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
4077 {
4078 size_t k;
4079 vargs.truncate (0);
4080 for (k = 0; k < nargs; k++)
4081 {
4082 vec<tree> vec_oprndsk = vec_defs[k];
4083 vargs.quick_push (vec_oprndsk[i]);
4084 vargs.quick_push (vec_oprndsk[i + 1]);
4085 }
4086 gcall *call;
4087 if (ifn != IFN_LAST)
4088 call = gimple_build_call_internal_vec (ifn, vargs);
4089 else
4090 call = gimple_build_call_vec (fndecl, vargs);
4091 new_temp = make_ssa_name (vec_dest, call);
4092 gimple_call_set_lhs (call, new_temp);
4093 gimple_call_set_nothrow (call, true);
4094 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
4095 slp_node->push_vec_def (call);
4096 }
4097 continue;
4098 }
4099
4100 for (i = 0; i < nargs; i++)
4101 {
4102 op = gimple_call_arg (stmt, i);
4103 if (j == 0)
4104 {
4105 vec_defs.quick_push (vNULL);
4106 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
4107 op, &vec_defs[i], vectypes[i]);
4108 }
4109 vec_oprnd0 = vec_defs[i][2*j];
4110 vec_oprnd1 = vec_defs[i][2*j+1];
4111
4112 vargs.quick_push (vec_oprnd0);
4113 vargs.quick_push (vec_oprnd1);
4114 }
4115
4116 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
4117 new_temp = make_ssa_name (vec_dest, new_stmt);
4118 gimple_call_set_lhs (new_stmt, new_temp);
4119 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4120
4121 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4122 }
4123
4124 if (!slp_node)
4125 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
4126
4127 for (i = 0; i < nargs; i++)
4128 {
4129 vec<tree> vec_oprndsi = vec_defs[i];
4130 vec_oprndsi.release ();
4131 }
4132 }
4133 else
4134 /* No current target implements this case. */
4135 return false;
4136
4137 vargs.release ();
4138
4139 /* The call in STMT might prevent it from being removed in dce.
4140 We however cannot remove it here, due to the way the ssa name
4141 it defines is mapped to the new definition. So just replace
4142 rhs of the statement with something harmless. */
4143
4144 if (slp_node)
4145 return true;
4146
4147 stmt_info = vect_orig_stmt (stmt_info);
4148 lhs = gimple_get_lhs (stmt_info->stmt);
4149
4150 gassign *new_stmt
4151 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4152 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
4153
4154 return true;
4155 }
4156
4157
4158 struct simd_call_arg_info
4159 {
4160 tree vectype;
4161 tree op;
4162 HOST_WIDE_INT linear_step;
4163 enum vect_def_type dt;
4164 unsigned int align;
4165 bool simd_lane_linear;
4166 };
4167
4168 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
4169 is linear within simd lane (but not within whole loop), note it in
4170 *ARGINFO. */
4171
4172 static void
4173 vect_simd_lane_linear (tree op, class loop *loop,
4174 struct simd_call_arg_info *arginfo)
4175 {
4176 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
4177
4178 if (!is_gimple_assign (def_stmt)
4179 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
4180 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
4181 return;
4182
4183 tree base = gimple_assign_rhs1 (def_stmt);
4184 HOST_WIDE_INT linear_step = 0;
4185 tree v = gimple_assign_rhs2 (def_stmt);
4186 while (TREE_CODE (v) == SSA_NAME)
4187 {
4188 tree t;
4189 def_stmt = SSA_NAME_DEF_STMT (v);
4190 if (is_gimple_assign (def_stmt))
4191 switch (gimple_assign_rhs_code (def_stmt))
4192 {
4193 case PLUS_EXPR:
4194 t = gimple_assign_rhs2 (def_stmt);
4195 if (linear_step || TREE_CODE (t) != INTEGER_CST)
4196 return;
4197 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
4198 v = gimple_assign_rhs1 (def_stmt);
4199 continue;
4200 case MULT_EXPR:
4201 t = gimple_assign_rhs2 (def_stmt);
4202 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
4203 return;
4204 linear_step = tree_to_shwi (t);
4205 v = gimple_assign_rhs1 (def_stmt);
4206 continue;
4207 CASE_CONVERT:
4208 t = gimple_assign_rhs1 (def_stmt);
4209 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
4210 || (TYPE_PRECISION (TREE_TYPE (v))
4211 < TYPE_PRECISION (TREE_TYPE (t))))
4212 return;
4213 if (!linear_step)
4214 linear_step = 1;
4215 v = t;
4216 continue;
4217 default:
4218 return;
4219 }
4220 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
4221 && loop->simduid
4222 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
4223 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
4224 == loop->simduid))
4225 {
4226 if (!linear_step)
4227 linear_step = 1;
4228 arginfo->linear_step = linear_step;
4229 arginfo->op = base;
4230 arginfo->simd_lane_linear = true;
4231 return;
4232 }
4233 }
4234 }
4235
4236 /* Return the number of elements in vector type VECTYPE, which is associated
4237 with a SIMD clone. At present these vectors always have a constant
4238 length. */
4239
4240 static unsigned HOST_WIDE_INT
4241 simd_clone_subparts (tree vectype)
4242 {
4243 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
4244 }
4245
4246 /* Function vectorizable_simd_clone_call.
4247
4248 Check if STMT_INFO performs a function call that can be vectorized
4249 by calling a simd clone of the function.
4250 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4251 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4252 Return true if STMT_INFO is vectorizable in this way. */
4253
4254 static bool
4255 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
4256 gimple_stmt_iterator *gsi,
4257 gimple **vec_stmt, slp_tree slp_node,
4258 stmt_vector_for_cost *)
4259 {
4260 tree vec_dest;
4261 tree scalar_dest;
4262 tree op, type;
4263 tree vec_oprnd0 = NULL_TREE;
4264 tree vectype;
4265 poly_uint64 nunits;
4266 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4267 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4268 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4269 tree fndecl, new_temp;
4270 int ncopies, j;
4271 auto_vec<simd_call_arg_info> arginfo;
4272 vec<tree> vargs = vNULL;
4273 size_t i, nargs;
4274 tree lhs, rtype, ratype;
4275 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4276 int arg_offset = 0;
4277
4278 /* Is STMT a vectorizable call? */
4279 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4280 if (!stmt)
4281 return false;
4282
4283 fndecl = gimple_call_fndecl (stmt);
4284 if (fndecl == NULL_TREE
4285 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4286 {
4287 fndecl = gimple_call_arg (stmt, 0);
4288 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4289 fndecl = TREE_OPERAND (fndecl, 0);
4290 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4291 arg_offset = 1;
4292 }
4293 if (fndecl == NULL_TREE)
4294 return false;
4295
4296 struct cgraph_node *node = cgraph_node::get (fndecl);
4297 if (node == NULL || node->simd_clones == NULL)
4298 return false;
4299
4300 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4301 return false;
4302
4303 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4304 && ! vec_stmt)
4305 return false;
4306
4307 if (gimple_call_lhs (stmt)
4308 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4309 return false;
4310
4311 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4312
4313 vectype = STMT_VINFO_VECTYPE (stmt_info);
4314
4315 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4316 return false;
4317
4318 /* FORNOW */
4319 if (slp_node)
4320 return false;
4321
4322 /* Process function arguments. */
4323 nargs = gimple_call_num_args (stmt) - arg_offset;
4324
4325 /* Bail out if the function has zero arguments. */
4326 if (nargs == 0)
4327 return false;
4328
4329 arginfo.reserve (nargs, true);
4330
4331 for (i = 0; i < nargs; i++)
4332 {
4333 simd_call_arg_info thisarginfo;
4334 affine_iv iv;
4335
4336 thisarginfo.linear_step = 0;
4337 thisarginfo.align = 0;
4338 thisarginfo.op = NULL_TREE;
4339 thisarginfo.simd_lane_linear = false;
4340
4341 op = gimple_call_arg (stmt, i + arg_offset);
4342 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4343 &thisarginfo.vectype)
4344 || thisarginfo.dt == vect_uninitialized_def)
4345 {
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4348 "use not simple.\n");
4349 return false;
4350 }
4351
4352 if (thisarginfo.dt == vect_constant_def
4353 || thisarginfo.dt == vect_external_def)
4354 gcc_assert (thisarginfo.vectype == NULL_TREE);
4355 else
4356 gcc_assert (thisarginfo.vectype != NULL_TREE);
4357
4358 /* For linear arguments, the analyze phase should have saved
4359 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4360 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4361 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4362 {
4363 gcc_assert (vec_stmt);
4364 thisarginfo.linear_step
4365 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4366 thisarginfo.op
4367 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4368 thisarginfo.simd_lane_linear
4369 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4370 == boolean_true_node);
4371 /* If loop has been peeled for alignment, we need to adjust it. */
4372 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4373 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4374 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4375 {
4376 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4377 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4378 tree opt = TREE_TYPE (thisarginfo.op);
4379 bias = fold_convert (TREE_TYPE (step), bias);
4380 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4381 thisarginfo.op
4382 = fold_build2 (POINTER_TYPE_P (opt)
4383 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4384 thisarginfo.op, bias);
4385 }
4386 }
4387 else if (!vec_stmt
4388 && thisarginfo.dt != vect_constant_def
4389 && thisarginfo.dt != vect_external_def
4390 && loop_vinfo
4391 && TREE_CODE (op) == SSA_NAME
4392 && simple_iv (loop, loop_containing_stmt (stmt), op,
4393 &iv, false)
4394 && tree_fits_shwi_p (iv.step))
4395 {
4396 thisarginfo.linear_step = tree_to_shwi (iv.step);
4397 thisarginfo.op = iv.base;
4398 }
4399 else if ((thisarginfo.dt == vect_constant_def
4400 || thisarginfo.dt == vect_external_def)
4401 && POINTER_TYPE_P (TREE_TYPE (op)))
4402 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4403 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4404 linear too. */
4405 if (POINTER_TYPE_P (TREE_TYPE (op))
4406 && !thisarginfo.linear_step
4407 && !vec_stmt
4408 && thisarginfo.dt != vect_constant_def
4409 && thisarginfo.dt != vect_external_def
4410 && loop_vinfo
4411 && !slp_node
4412 && TREE_CODE (op) == SSA_NAME)
4413 vect_simd_lane_linear (op, loop, &thisarginfo);
4414
4415 arginfo.quick_push (thisarginfo);
4416 }
4417
4418 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4419 if (!vf.is_constant ())
4420 {
4421 if (dump_enabled_p ())
4422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4423 "not considering SIMD clones; not yet supported"
4424 " for variable-width vectors.\n");
4425 return false;
4426 }
4427
4428 unsigned int badness = 0;
4429 struct cgraph_node *bestn = NULL;
4430 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4431 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4432 else
4433 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4434 n = n->simdclone->next_clone)
4435 {
4436 unsigned int this_badness = 0;
4437 unsigned int num_calls;
4438 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4439 || n->simdclone->nargs != nargs)
4440 continue;
4441 if (num_calls != 1)
4442 this_badness += exact_log2 (num_calls) * 4096;
4443 if (n->simdclone->inbranch)
4444 this_badness += 8192;
4445 int target_badness = targetm.simd_clone.usable (n);
4446 if (target_badness < 0)
4447 continue;
4448 this_badness += target_badness * 512;
4449 for (i = 0; i < nargs; i++)
4450 {
4451 switch (n->simdclone->args[i].arg_type)
4452 {
4453 case SIMD_CLONE_ARG_TYPE_VECTOR:
4454 if (!useless_type_conversion_p
4455 (n->simdclone->args[i].orig_type,
4456 TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
4457 i = -1;
4458 else if (arginfo[i].dt == vect_constant_def
4459 || arginfo[i].dt == vect_external_def
4460 || arginfo[i].linear_step)
4461 this_badness += 64;
4462 break;
4463 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4464 if (arginfo[i].dt != vect_constant_def
4465 && arginfo[i].dt != vect_external_def)
4466 i = -1;
4467 break;
4468 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4469 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4470 if (arginfo[i].dt == vect_constant_def
4471 || arginfo[i].dt == vect_external_def
4472 || (arginfo[i].linear_step
4473 != n->simdclone->args[i].linear_step))
4474 i = -1;
4475 break;
4476 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4477 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4478 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4479 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4480 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4481 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4482 /* FORNOW */
4483 i = -1;
4484 break;
4485 case SIMD_CLONE_ARG_TYPE_MASK:
4486 break;
4487 }
4488 if (i == (size_t) -1)
4489 break;
4490 if (n->simdclone->args[i].alignment > arginfo[i].align)
4491 {
4492 i = -1;
4493 break;
4494 }
4495 if (arginfo[i].align)
4496 this_badness += (exact_log2 (arginfo[i].align)
4497 - exact_log2 (n->simdclone->args[i].alignment));
4498 }
4499 if (i == (size_t) -1)
4500 continue;
4501 if (bestn == NULL || this_badness < badness)
4502 {
4503 bestn = n;
4504 badness = this_badness;
4505 }
4506 }
4507
4508 if (bestn == NULL)
4509 return false;
4510
4511 for (i = 0; i < nargs; i++)
4512 {
4513 if ((arginfo[i].dt == vect_constant_def
4514 || arginfo[i].dt == vect_external_def)
4515 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4516 {
4517 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
4518 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4519 slp_node);
4520 if (arginfo[i].vectype == NULL
4521 || !constant_multiple_p (bestn->simdclone->simdlen,
4522 simd_clone_subparts (arginfo[i].vectype)))
4523 return false;
4524 }
4525
4526 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4527 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4528 {
4529 if (dump_enabled_p ())
4530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4531 "vector mask arguments are not supported.\n");
4532 return false;
4533 }
4534
4535 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4536 && bestn->simdclone->mask_mode == VOIDmode
4537 && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
4538 != simd_clone_subparts (arginfo[i].vectype)))
4539 {
4540 /* FORNOW we only have partial support for vector-type masks that
4541 can't hold all of simdlen. */
4542 if (dump_enabled_p ())
4543 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4544 vect_location,
4545 "in-branch vector clones are not yet"
4546 " supported for mismatched vector sizes.\n");
4547 return false;
4548 }
4549 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4550 && bestn->simdclone->mask_mode != VOIDmode)
4551 {
4552 /* FORNOW don't support integer-type masks. */
4553 if (dump_enabled_p ())
4554 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4555 vect_location,
4556 "in-branch vector clones are not yet"
4557 " supported for integer mask modes.\n");
4558 return false;
4559 }
4560 }
4561
4562 fndecl = bestn->decl;
4563 nunits = bestn->simdclone->simdlen;
4564 ncopies = vector_unroll_factor (vf, nunits);
4565
4566 /* If the function isn't const, only allow it in simd loops where user
4567 has asserted that at least nunits consecutive iterations can be
4568 performed using SIMD instructions. */
4569 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4570 && gimple_vuse (stmt))
4571 return false;
4572
4573 /* Sanity check: make sure that at least one copy of the vectorized stmt
4574 needs to be generated. */
4575 gcc_assert (ncopies >= 1);
4576
4577 if (!vec_stmt) /* transformation not required. */
4578 {
4579 /* When the original call is pure or const but the SIMD ABI dictates
4580 an aggregate return we will have to use a virtual definition and
4581 in a loop eventually even need to add a virtual PHI. That's
4582 not straight-forward so allow to fix this up via renaming. */
4583 if (gimple_call_lhs (stmt)
4584 && !gimple_vdef (stmt)
4585 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4586 vinfo->any_known_not_updated_vssa = true;
4587 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4588 for (i = 0; i < nargs; i++)
4589 if ((bestn->simdclone->args[i].arg_type
4590 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4591 || (bestn->simdclone->args[i].arg_type
4592 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4593 {
4594 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4595 + 1,
4596 true);
4597 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4598 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4599 ? size_type_node : TREE_TYPE (arginfo[i].op);
4600 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4601 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4602 tree sll = arginfo[i].simd_lane_linear
4603 ? boolean_true_node : boolean_false_node;
4604 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4605 }
4606 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4607 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4608 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4609 dt, slp_node, cost_vec); */
4610 return true;
4611 }
4612
4613 /* Transform. */
4614
4615 if (dump_enabled_p ())
4616 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4617
4618 /* Handle def. */
4619 scalar_dest = gimple_call_lhs (stmt);
4620 vec_dest = NULL_TREE;
4621 rtype = NULL_TREE;
4622 ratype = NULL_TREE;
4623 if (scalar_dest)
4624 {
4625 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4626 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4627 if (TREE_CODE (rtype) == ARRAY_TYPE)
4628 {
4629 ratype = rtype;
4630 rtype = TREE_TYPE (ratype);
4631 }
4632 }
4633
4634 auto_vec<vec<tree> > vec_oprnds;
4635 auto_vec<unsigned> vec_oprnds_i;
4636 vec_oprnds.safe_grow_cleared (nargs, true);
4637 vec_oprnds_i.safe_grow_cleared (nargs, true);
4638 for (j = 0; j < ncopies; ++j)
4639 {
4640 /* Build argument list for the vectorized call. */
4641 if (j == 0)
4642 vargs.create (nargs);
4643 else
4644 vargs.truncate (0);
4645
4646 for (i = 0; i < nargs; i++)
4647 {
4648 unsigned int k, l, m, o;
4649 tree atype;
4650 op = gimple_call_arg (stmt, i + arg_offset);
4651 switch (bestn->simdclone->args[i].arg_type)
4652 {
4653 case SIMD_CLONE_ARG_TYPE_VECTOR:
4654 atype = bestn->simdclone->args[i].vector_type;
4655 o = vector_unroll_factor (nunits,
4656 simd_clone_subparts (atype));
4657 for (m = j * o; m < (j + 1) * o; m++)
4658 {
4659 if (simd_clone_subparts (atype)
4660 < simd_clone_subparts (arginfo[i].vectype))
4661 {
4662 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4663 k = (simd_clone_subparts (arginfo[i].vectype)
4664 / simd_clone_subparts (atype));
4665 gcc_assert ((k & (k - 1)) == 0);
4666 if (m == 0)
4667 {
4668 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4669 ncopies * o / k, op,
4670 &vec_oprnds[i]);
4671 vec_oprnds_i[i] = 0;
4672 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4673 }
4674 else
4675 {
4676 vec_oprnd0 = arginfo[i].op;
4677 if ((m & (k - 1)) == 0)
4678 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4679 }
4680 arginfo[i].op = vec_oprnd0;
4681 vec_oprnd0
4682 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4683 bitsize_int (prec),
4684 bitsize_int ((m & (k - 1)) * prec));
4685 gassign *new_stmt
4686 = gimple_build_assign (make_ssa_name (atype),
4687 vec_oprnd0);
4688 vect_finish_stmt_generation (vinfo, stmt_info,
4689 new_stmt, gsi);
4690 vargs.safe_push (gimple_assign_lhs (new_stmt));
4691 }
4692 else
4693 {
4694 k = (simd_clone_subparts (atype)
4695 / simd_clone_subparts (arginfo[i].vectype));
4696 gcc_assert ((k & (k - 1)) == 0);
4697 vec<constructor_elt, va_gc> *ctor_elts;
4698 if (k != 1)
4699 vec_alloc (ctor_elts, k);
4700 else
4701 ctor_elts = NULL;
4702 for (l = 0; l < k; l++)
4703 {
4704 if (m == 0 && l == 0)
4705 {
4706 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4707 k * o * ncopies,
4708 op,
4709 &vec_oprnds[i]);
4710 vec_oprnds_i[i] = 0;
4711 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4712 }
4713 else
4714 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4715 arginfo[i].op = vec_oprnd0;
4716 if (k == 1)
4717 break;
4718 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4719 vec_oprnd0);
4720 }
4721 if (k == 1)
4722 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4723 atype))
4724 {
4725 vec_oprnd0
4726 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4727 gassign *new_stmt
4728 = gimple_build_assign (make_ssa_name (atype),
4729 vec_oprnd0);
4730 vect_finish_stmt_generation (vinfo, stmt_info,
4731 new_stmt, gsi);
4732 vargs.safe_push (gimple_assign_lhs (new_stmt));
4733 }
4734 else
4735 vargs.safe_push (vec_oprnd0);
4736 else
4737 {
4738 vec_oprnd0 = build_constructor (atype, ctor_elts);
4739 gassign *new_stmt
4740 = gimple_build_assign (make_ssa_name (atype),
4741 vec_oprnd0);
4742 vect_finish_stmt_generation (vinfo, stmt_info,
4743 new_stmt, gsi);
4744 vargs.safe_push (gimple_assign_lhs (new_stmt));
4745 }
4746 }
4747 }
4748 break;
4749 case SIMD_CLONE_ARG_TYPE_MASK:
4750 atype = bestn->simdclone->args[i].vector_type;
4751 if (bestn->simdclone->mask_mode != VOIDmode)
4752 {
4753 /* FORNOW: this is disabled above. */
4754 gcc_unreachable ();
4755 }
4756 else
4757 {
4758 tree elt_type = TREE_TYPE (atype);
4759 tree one = fold_convert (elt_type, integer_one_node);
4760 tree zero = fold_convert (elt_type, integer_zero_node);
4761 o = vector_unroll_factor (nunits,
4762 simd_clone_subparts (atype));
4763 for (m = j * o; m < (j + 1) * o; m++)
4764 {
4765 if (simd_clone_subparts (atype)
4766 < simd_clone_subparts (arginfo[i].vectype))
4767 {
4768 /* The mask type has fewer elements than simdlen. */
4769
4770 /* FORNOW */
4771 gcc_unreachable ();
4772 }
4773 else if (simd_clone_subparts (atype)
4774 == simd_clone_subparts (arginfo[i].vectype))
4775 {
4776 /* The SIMD clone function has the same number of
4777 elements as the current function. */
4778 if (m == 0)
4779 {
4780 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4781 o * ncopies,
4782 op,
4783 &vec_oprnds[i]);
4784 vec_oprnds_i[i] = 0;
4785 }
4786 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4787 vec_oprnd0
4788 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4789 build_vector_from_val (atype, one),
4790 build_vector_from_val (atype, zero));
4791 gassign *new_stmt
4792 = gimple_build_assign (make_ssa_name (atype),
4793 vec_oprnd0);
4794 vect_finish_stmt_generation (vinfo, stmt_info,
4795 new_stmt, gsi);
4796 vargs.safe_push (gimple_assign_lhs (new_stmt));
4797 }
4798 else
4799 {
4800 /* The mask type has more elements than simdlen. */
4801
4802 /* FORNOW */
4803 gcc_unreachable ();
4804 }
4805 }
4806 }
4807 break;
4808 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4809 vargs.safe_push (op);
4810 break;
4811 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4812 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4813 if (j == 0)
4814 {
4815 gimple_seq stmts;
4816 arginfo[i].op
4817 = force_gimple_operand (unshare_expr (arginfo[i].op),
4818 &stmts, true, NULL_TREE);
4819 if (stmts != NULL)
4820 {
4821 basic_block new_bb;
4822 edge pe = loop_preheader_edge (loop);
4823 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4824 gcc_assert (!new_bb);
4825 }
4826 if (arginfo[i].simd_lane_linear)
4827 {
4828 vargs.safe_push (arginfo[i].op);
4829 break;
4830 }
4831 tree phi_res = copy_ssa_name (op);
4832 gphi *new_phi = create_phi_node (phi_res, loop->header);
4833 add_phi_arg (new_phi, arginfo[i].op,
4834 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4835 enum tree_code code
4836 = POINTER_TYPE_P (TREE_TYPE (op))
4837 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4838 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4839 ? sizetype : TREE_TYPE (op);
4840 poly_widest_int cst
4841 = wi::mul (bestn->simdclone->args[i].linear_step,
4842 ncopies * nunits);
4843 tree tcst = wide_int_to_tree (type, cst);
4844 tree phi_arg = copy_ssa_name (op);
4845 gassign *new_stmt
4846 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4847 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4848 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4849 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4850 UNKNOWN_LOCATION);
4851 arginfo[i].op = phi_res;
4852 vargs.safe_push (phi_res);
4853 }
4854 else
4855 {
4856 enum tree_code code
4857 = POINTER_TYPE_P (TREE_TYPE (op))
4858 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4859 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4860 ? sizetype : TREE_TYPE (op);
4861 poly_widest_int cst
4862 = wi::mul (bestn->simdclone->args[i].linear_step,
4863 j * nunits);
4864 tree tcst = wide_int_to_tree (type, cst);
4865 new_temp = make_ssa_name (TREE_TYPE (op));
4866 gassign *new_stmt
4867 = gimple_build_assign (new_temp, code,
4868 arginfo[i].op, tcst);
4869 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4870 vargs.safe_push (new_temp);
4871 }
4872 break;
4873 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4874 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4875 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4876 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4877 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4878 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4879 default:
4880 gcc_unreachable ();
4881 }
4882 }
4883
4884 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4885 if (vec_dest)
4886 {
4887 gcc_assert (ratype
4888 || known_eq (simd_clone_subparts (rtype), nunits));
4889 if (ratype)
4890 new_temp = create_tmp_var (ratype);
4891 else if (useless_type_conversion_p (vectype, rtype))
4892 new_temp = make_ssa_name (vec_dest, new_call);
4893 else
4894 new_temp = make_ssa_name (rtype, new_call);
4895 gimple_call_set_lhs (new_call, new_temp);
4896 }
4897 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4898 gimple *new_stmt = new_call;
4899
4900 if (vec_dest)
4901 {
4902 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4903 {
4904 unsigned int k, l;
4905 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4906 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4907 k = vector_unroll_factor (nunits,
4908 simd_clone_subparts (vectype));
4909 gcc_assert ((k & (k - 1)) == 0);
4910 for (l = 0; l < k; l++)
4911 {
4912 tree t;
4913 if (ratype)
4914 {
4915 t = build_fold_addr_expr (new_temp);
4916 t = build2 (MEM_REF, vectype, t,
4917 build_int_cst (TREE_TYPE (t), l * bytes));
4918 }
4919 else
4920 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4921 bitsize_int (prec), bitsize_int (l * prec));
4922 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4923 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4924
4925 if (j == 0 && l == 0)
4926 *vec_stmt = new_stmt;
4927 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4928 }
4929
4930 if (ratype)
4931 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4932 continue;
4933 }
4934 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4935 {
4936 unsigned int k = (simd_clone_subparts (vectype)
4937 / simd_clone_subparts (rtype));
4938 gcc_assert ((k & (k - 1)) == 0);
4939 if ((j & (k - 1)) == 0)
4940 vec_alloc (ret_ctor_elts, k);
4941 if (ratype)
4942 {
4943 unsigned int m, o;
4944 o = vector_unroll_factor (nunits,
4945 simd_clone_subparts (rtype));
4946 for (m = 0; m < o; m++)
4947 {
4948 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4949 size_int (m), NULL_TREE, NULL_TREE);
4950 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4951 tem);
4952 vect_finish_stmt_generation (vinfo, stmt_info,
4953 new_stmt, gsi);
4954 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4955 gimple_assign_lhs (new_stmt));
4956 }
4957 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4958 }
4959 else
4960 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4961 if ((j & (k - 1)) != k - 1)
4962 continue;
4963 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4964 new_stmt
4965 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4966 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4967
4968 if ((unsigned) j == k - 1)
4969 *vec_stmt = new_stmt;
4970 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4971 continue;
4972 }
4973 else if (ratype)
4974 {
4975 tree t = build_fold_addr_expr (new_temp);
4976 t = build2 (MEM_REF, vectype, t,
4977 build_int_cst (TREE_TYPE (t), 0));
4978 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4979 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4980 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4981 }
4982 else if (!useless_type_conversion_p (vectype, rtype))
4983 {
4984 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4985 new_stmt
4986 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4987 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4988 }
4989 }
4990
4991 if (j == 0)
4992 *vec_stmt = new_stmt;
4993 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4994 }
4995
4996 for (i = 0; i < nargs; ++i)
4997 {
4998 vec<tree> oprndsi = vec_oprnds[i];
4999 oprndsi.release ();
5000 }
5001 vargs.release ();
5002
5003 /* Mark the clone as no longer being a candidate for GC. */
5004 bestn->gc_candidate = false;
5005
5006 /* The call in STMT might prevent it from being removed in dce.
5007 We however cannot remove it here, due to the way the ssa name
5008 it defines is mapped to the new definition. So just replace
5009 rhs of the statement with something harmless. */
5010
5011 if (slp_node)
5012 return true;
5013
5014 gimple *new_stmt;
5015 if (scalar_dest)
5016 {
5017 type = TREE_TYPE (scalar_dest);
5018 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
5019 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
5020 }
5021 else
5022 new_stmt = gimple_build_nop ();
5023 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
5024 unlink_stmt_vdef (stmt);
5025
5026 return true;
5027 }
5028
5029
5030 /* Function vect_gen_widened_results_half
5031
5032 Create a vector stmt whose code, type, number of arguments, and result
5033 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5034 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5035 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5036 needs to be created (DECL is a function-decl of a target-builtin).
5037 STMT_INFO is the original scalar stmt that we are vectorizing. */
5038
5039 static gimple *
5040 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5041 tree vec_oprnd0, tree vec_oprnd1, int op_type,
5042 tree vec_dest, gimple_stmt_iterator *gsi,
5043 stmt_vec_info stmt_info)
5044 {
5045 gimple *new_stmt;
5046 tree new_temp;
5047
5048 /* Generate half of the widened result: */
5049 if (op_type != binary_op)
5050 vec_oprnd1 = NULL;
5051 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5052 new_temp = make_ssa_name (vec_dest, new_stmt);
5053 gimple_set_lhs (new_stmt, new_temp);
5054 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5055
5056 return new_stmt;
5057 }
5058
5059
5060 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5061 For multi-step conversions store the resulting vectors and call the function
5062 recursively. When NARROW_SRC_P is true, there's still a conversion after
5063 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5064 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5065
5066 static void
5067 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5068 int multi_step_cvt,
5069 stmt_vec_info stmt_info,
5070 vec<tree> &vec_dsts,
5071 gimple_stmt_iterator *gsi,
5072 slp_tree slp_node, code_helper code,
5073 bool narrow_src_p)
5074 {
5075 unsigned int i;
5076 tree vop0, vop1, new_tmp, vec_dest;
5077
5078 vec_dest = vec_dsts.pop ();
5079
5080 for (i = 0; i < vec_oprnds->length (); i += 2)
5081 {
5082 /* Create demotion operation. */
5083 vop0 = (*vec_oprnds)[i];
5084 vop1 = (*vec_oprnds)[i + 1];
5085 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5086 new_tmp = make_ssa_name (vec_dest, new_stmt);
5087 gimple_set_lhs (new_stmt, new_tmp);
5088 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5089 if (multi_step_cvt || narrow_src_p)
5090 /* Store the resulting vector for next recursive call,
5091 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5092 (*vec_oprnds)[i/2] = new_tmp;
5093 else
5094 {
5095 /* This is the last step of the conversion sequence. Store the
5096 vectors in SLP_NODE or in vector info of the scalar statement
5097 (or in STMT_VINFO_RELATED_STMT chain). */
5098 if (slp_node)
5099 slp_node->push_vec_def (new_stmt);
5100 else
5101 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5102 }
5103 }
5104
5105 /* For multi-step demotion operations we first generate demotion operations
5106 from the source type to the intermediate types, and then combine the
5107 results (stored in VEC_OPRNDS) in demotion operation to the destination
5108 type. */
5109 if (multi_step_cvt)
5110 {
5111 /* At each level of recursion we have half of the operands we had at the
5112 previous level. */
5113 vec_oprnds->truncate ((i+1)/2);
5114 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5115 multi_step_cvt - 1,
5116 stmt_info, vec_dsts, gsi,
5117 slp_node, VEC_PACK_TRUNC_EXPR,
5118 narrow_src_p);
5119 }
5120
5121 vec_dsts.quick_push (vec_dest);
5122 }
5123
5124
5125 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5126 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5127 STMT_INFO. For multi-step conversions store the resulting vectors and
5128 call the function recursively. */
5129
5130 static void
5131 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5132 vec<tree> *vec_oprnds0,
5133 vec<tree> *vec_oprnds1,
5134 stmt_vec_info stmt_info, tree vec_dest,
5135 gimple_stmt_iterator *gsi,
5136 code_helper ch1,
5137 code_helper ch2, int op_type)
5138 {
5139 int i;
5140 tree vop0, vop1, new_tmp1, new_tmp2;
5141 gimple *new_stmt1, *new_stmt2;
5142 vec<tree> vec_tmp = vNULL;
5143
5144 vec_tmp.create (vec_oprnds0->length () * 2);
5145 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5146 {
5147 if (op_type == binary_op)
5148 vop1 = (*vec_oprnds1)[i];
5149 else
5150 vop1 = NULL_TREE;
5151
5152 /* Generate the two halves of promotion operation. */
5153 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5154 op_type, vec_dest, gsi,
5155 stmt_info);
5156 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5157 op_type, vec_dest, gsi,
5158 stmt_info);
5159 if (is_gimple_call (new_stmt1))
5160 {
5161 new_tmp1 = gimple_call_lhs (new_stmt1);
5162 new_tmp2 = gimple_call_lhs (new_stmt2);
5163 }
5164 else
5165 {
5166 new_tmp1 = gimple_assign_lhs (new_stmt1);
5167 new_tmp2 = gimple_assign_lhs (new_stmt2);
5168 }
5169
5170 /* Store the results for the next step. */
5171 vec_tmp.quick_push (new_tmp1);
5172 vec_tmp.quick_push (new_tmp2);
5173 }
5174
5175 vec_oprnds0->release ();
5176 *vec_oprnds0 = vec_tmp;
5177 }
5178
5179 /* Create vectorized promotion stmts for widening stmts using only half the
5180 potential vector size for input. */
5181 static void
5182 vect_create_half_widening_stmts (vec_info *vinfo,
5183 vec<tree> *vec_oprnds0,
5184 vec<tree> *vec_oprnds1,
5185 stmt_vec_info stmt_info, tree vec_dest,
5186 gimple_stmt_iterator *gsi,
5187 code_helper code1,
5188 int op_type)
5189 {
5190 int i;
5191 tree vop0, vop1;
5192 gimple *new_stmt1;
5193 gimple *new_stmt2;
5194 gimple *new_stmt3;
5195 vec<tree> vec_tmp = vNULL;
5196
5197 vec_tmp.create (vec_oprnds0->length ());
5198 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5199 {
5200 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5201
5202 gcc_assert (op_type == binary_op);
5203 vop1 = (*vec_oprnds1)[i];
5204
5205 /* Widen the first vector input. */
5206 out_type = TREE_TYPE (vec_dest);
5207 new_tmp1 = make_ssa_name (out_type);
5208 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5209 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5210 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5211 {
5212 /* Widen the second vector input. */
5213 new_tmp2 = make_ssa_name (out_type);
5214 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5215 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5216 /* Perform the operation. With both vector inputs widened. */
5217 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5218 }
5219 else
5220 {
5221 /* Perform the operation. With the single vector input widened. */
5222 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5223 }
5224
5225 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5226 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5227 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5228
5229 /* Store the results for the next step. */
5230 vec_tmp.quick_push (new_tmp3);
5231 }
5232
5233 vec_oprnds0->release ();
5234 *vec_oprnds0 = vec_tmp;
5235 }
5236
5237
5238 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5239 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5240 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5241 Return true if STMT_INFO is vectorizable in this way. */
5242
5243 static bool
5244 vectorizable_conversion (vec_info *vinfo,
5245 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5246 gimple **vec_stmt, slp_tree slp_node,
5247 stmt_vector_for_cost *cost_vec)
5248 {
5249 tree vec_dest, cvt_op = NULL_TREE;
5250 tree scalar_dest;
5251 tree op0, op1 = NULL_TREE;
5252 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5253 tree_code tc1, tc2;
5254 code_helper code, code1, code2;
5255 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5256 tree new_temp;
5257 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5258 int ndts = 2;
5259 poly_uint64 nunits_in;
5260 poly_uint64 nunits_out;
5261 tree vectype_out, vectype_in;
5262 int ncopies, i;
5263 tree lhs_type, rhs_type;
5264 /* For conversions between floating point and integer, there're 2 NARROW
5265 cases. NARROW_SRC is for FLOAT_EXPR, means
5266 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5267 This is safe when the range of the source integer can fit into the lower
5268 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5269 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5270 For other conversions, when there's narrowing, NARROW_DST is used as
5271 default. */
5272 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5273 vec<tree> vec_oprnds0 = vNULL;
5274 vec<tree> vec_oprnds1 = vNULL;
5275 tree vop0;
5276 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5277 int multi_step_cvt = 0;
5278 vec<tree> interm_types = vNULL;
5279 tree intermediate_type, cvt_type = NULL_TREE;
5280 int op_type;
5281 unsigned short fltsz;
5282
5283 /* Is STMT a vectorizable conversion? */
5284
5285 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5286 return false;
5287
5288 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5289 && ! vec_stmt)
5290 return false;
5291
5292 gimple* stmt = stmt_info->stmt;
5293 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5294 return false;
5295
5296 if (gimple_get_lhs (stmt) == NULL_TREE
5297 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5298 return false;
5299
5300 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5301 return false;
5302
5303 if (is_gimple_assign (stmt))
5304 {
5305 code = gimple_assign_rhs_code (stmt);
5306 op_type = TREE_CODE_LENGTH ((tree_code) code);
5307 }
5308 else if (gimple_call_internal_p (stmt))
5309 {
5310 code = gimple_call_internal_fn (stmt);
5311 op_type = gimple_call_num_args (stmt);
5312 }
5313 else
5314 return false;
5315
5316 bool widen_arith = (code == WIDEN_MULT_EXPR
5317 || code == WIDEN_LSHIFT_EXPR
5318 || widening_fn_p (code));
5319
5320 if (!widen_arith
5321 && !CONVERT_EXPR_CODE_P (code)
5322 && code != FIX_TRUNC_EXPR
5323 && code != FLOAT_EXPR)
5324 return false;
5325
5326 /* Check types of lhs and rhs. */
5327 scalar_dest = gimple_get_lhs (stmt);
5328 lhs_type = TREE_TYPE (scalar_dest);
5329 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5330
5331 /* Check the operands of the operation. */
5332 slp_tree slp_op0, slp_op1 = NULL;
5333 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5334 0, &op0, &slp_op0, &dt[0], &vectype_in))
5335 {
5336 if (dump_enabled_p ())
5337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5338 "use not simple.\n");
5339 return false;
5340 }
5341
5342 rhs_type = TREE_TYPE (op0);
5343 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5344 && !((INTEGRAL_TYPE_P (lhs_type)
5345 && INTEGRAL_TYPE_P (rhs_type))
5346 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5347 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5348 return false;
5349
5350 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5351 && ((INTEGRAL_TYPE_P (lhs_type)
5352 && !type_has_mode_precision_p (lhs_type))
5353 || (INTEGRAL_TYPE_P (rhs_type)
5354 && !type_has_mode_precision_p (rhs_type))))
5355 {
5356 if (dump_enabled_p ())
5357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5358 "type conversion to/from bit-precision unsupported."
5359 "\n");
5360 return false;
5361 }
5362
5363 if (op_type == binary_op)
5364 {
5365 gcc_assert (code == WIDEN_MULT_EXPR
5366 || code == WIDEN_LSHIFT_EXPR
5367 || widening_fn_p (code));
5368
5369 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5370 gimple_call_arg (stmt, 0);
5371 tree vectype1_in;
5372 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5373 &op1, &slp_op1, &dt[1], &vectype1_in))
5374 {
5375 if (dump_enabled_p ())
5376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5377 "use not simple.\n");
5378 return false;
5379 }
5380 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5381 OP1. */
5382 if (!vectype_in)
5383 vectype_in = vectype1_in;
5384 }
5385
5386 /* If op0 is an external or constant def, infer the vector type
5387 from the scalar type. */
5388 if (!vectype_in)
5389 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5390 if (vec_stmt)
5391 gcc_assert (vectype_in);
5392 if (!vectype_in)
5393 {
5394 if (dump_enabled_p ())
5395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5396 "no vectype for scalar type %T\n", rhs_type);
5397
5398 return false;
5399 }
5400
5401 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5402 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5403 {
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406 "can't convert between boolean and non "
5407 "boolean vectors %T\n", rhs_type);
5408
5409 return false;
5410 }
5411
5412 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5413 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5414 if (known_eq (nunits_out, nunits_in))
5415 if (widen_arith)
5416 modifier = WIDEN;
5417 else
5418 modifier = NONE;
5419 else if (multiple_p (nunits_out, nunits_in))
5420 modifier = NARROW_DST;
5421 else
5422 {
5423 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5424 modifier = WIDEN;
5425 }
5426
5427 /* Multiple types in SLP are handled by creating the appropriate number of
5428 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5429 case of SLP. */
5430 if (slp_node)
5431 ncopies = 1;
5432 else if (modifier == NARROW_DST)
5433 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5434 else
5435 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5436
5437 /* Sanity check: make sure that at least one copy of the vectorized stmt
5438 needs to be generated. */
5439 gcc_assert (ncopies >= 1);
5440
5441 bool found_mode = false;
5442 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5443 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5444 opt_scalar_mode rhs_mode_iter;
5445
5446 /* Supportable by target? */
5447 switch (modifier)
5448 {
5449 case NONE:
5450 if (code != FIX_TRUNC_EXPR
5451 && code != FLOAT_EXPR
5452 && !CONVERT_EXPR_CODE_P (code))
5453 return false;
5454 gcc_assert (code.is_tree_code ());
5455 if (supportable_convert_operation ((tree_code) code, vectype_out,
5456 vectype_in, &tc1))
5457 {
5458 code1 = tc1;
5459 break;
5460 }
5461
5462 /* For conversions between float and integer types try whether
5463 we can use intermediate signed integer types to support the
5464 conversion. */
5465 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5466 && (code == FLOAT_EXPR ||
5467 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5468 {
5469 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5470 bool float_expr_p = code == FLOAT_EXPR;
5471 unsigned short target_size;
5472 scalar_mode intermediate_mode;
5473 if (demotion)
5474 {
5475 intermediate_mode = lhs_mode;
5476 target_size = GET_MODE_SIZE (rhs_mode);
5477 }
5478 else
5479 {
5480 target_size = GET_MODE_SIZE (lhs_mode);
5481 if (!int_mode_for_size
5482 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5483 goto unsupported;
5484 }
5485 code1 = float_expr_p ? code : NOP_EXPR;
5486 codecvt1 = float_expr_p ? NOP_EXPR : code;
5487 opt_scalar_mode mode_iter;
5488 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5489 {
5490 intermediate_mode = mode_iter.require ();
5491
5492 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5493 break;
5494
5495 scalar_mode cvt_mode;
5496 if (!int_mode_for_size
5497 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5498 break;
5499
5500 cvt_type = build_nonstandard_integer_type
5501 (GET_MODE_BITSIZE (cvt_mode), 0);
5502
5503 /* Check if the intermediate type can hold OP0's range.
5504 When converting from float to integer this is not necessary
5505 because values that do not fit the (smaller) target type are
5506 unspecified anyway. */
5507 if (demotion && float_expr_p)
5508 {
5509 wide_int op_min_value, op_max_value;
5510 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5511 break;
5512
5513 if (cvt_type == NULL_TREE
5514 || (wi::min_precision (op_max_value, SIGNED)
5515 > TYPE_PRECISION (cvt_type))
5516 || (wi::min_precision (op_min_value, SIGNED)
5517 > TYPE_PRECISION (cvt_type)))
5518 continue;
5519 }
5520
5521 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5522 /* This should only happened for SLP as long as loop vectorizer
5523 only supports same-sized vector. */
5524 if (cvt_type == NULL_TREE
5525 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5526 || !supportable_convert_operation ((tree_code) code1,
5527 vectype_out,
5528 cvt_type, &tc1)
5529 || !supportable_convert_operation ((tree_code) codecvt1,
5530 cvt_type,
5531 vectype_in, &tc2))
5532 continue;
5533
5534 found_mode = true;
5535 break;
5536 }
5537
5538 if (found_mode)
5539 {
5540 multi_step_cvt++;
5541 interm_types.safe_push (cvt_type);
5542 cvt_type = NULL_TREE;
5543 code1 = tc1;
5544 codecvt1 = tc2;
5545 break;
5546 }
5547 }
5548 /* FALLTHRU */
5549 unsupported:
5550 if (dump_enabled_p ())
5551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5552 "conversion not supported by target.\n");
5553 return false;
5554
5555 case WIDEN:
5556 if (known_eq (nunits_in, nunits_out))
5557 {
5558 if (!(code.is_tree_code ()
5559 && supportable_half_widening_operation ((tree_code) code,
5560 vectype_out, vectype_in,
5561 &tc1)))
5562 goto unsupported;
5563 code1 = tc1;
5564 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5565 break;
5566 }
5567 if (supportable_widening_operation (vinfo, code, stmt_info,
5568 vectype_out, vectype_in, &code1,
5569 &code2, &multi_step_cvt,
5570 &interm_types))
5571 {
5572 /* Binary widening operation can only be supported directly by the
5573 architecture. */
5574 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5575 break;
5576 }
5577
5578 if (code != FLOAT_EXPR
5579 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5580 goto unsupported;
5581
5582 fltsz = GET_MODE_SIZE (lhs_mode);
5583 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5584 {
5585 rhs_mode = rhs_mode_iter.require ();
5586 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5587 break;
5588
5589 cvt_type
5590 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5591 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5592 if (cvt_type == NULL_TREE)
5593 goto unsupported;
5594
5595 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5596 {
5597 tc1 = ERROR_MARK;
5598 gcc_assert (code.is_tree_code ());
5599 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5600 cvt_type, &tc1))
5601 goto unsupported;
5602 codecvt1 = tc1;
5603 }
5604 else if (!supportable_widening_operation (vinfo, code,
5605 stmt_info, vectype_out,
5606 cvt_type, &codecvt1,
5607 &codecvt2, &multi_step_cvt,
5608 &interm_types))
5609 continue;
5610 else
5611 gcc_assert (multi_step_cvt == 0);
5612
5613 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5614 cvt_type,
5615 vectype_in, &code1,
5616 &code2, &multi_step_cvt,
5617 &interm_types))
5618 {
5619 found_mode = true;
5620 break;
5621 }
5622 }
5623
5624 if (!found_mode)
5625 goto unsupported;
5626
5627 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5628 codecvt2 = ERROR_MARK;
5629 else
5630 {
5631 multi_step_cvt++;
5632 interm_types.safe_push (cvt_type);
5633 cvt_type = NULL_TREE;
5634 }
5635 break;
5636
5637 case NARROW_DST:
5638 gcc_assert (op_type == unary_op);
5639 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5640 &code1, &multi_step_cvt,
5641 &interm_types))
5642 break;
5643
5644 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5645 goto unsupported;
5646
5647 if (code == FIX_TRUNC_EXPR)
5648 {
5649 cvt_type
5650 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5651 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5652 if (cvt_type == NULL_TREE)
5653 goto unsupported;
5654 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5655 &tc1))
5656 codecvt1 = tc1;
5657 else
5658 goto unsupported;
5659 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5660 &code1, &multi_step_cvt,
5661 &interm_types))
5662 break;
5663 }
5664 /* If op0 can be represented with low precision integer,
5665 truncate it to cvt_type and the do FLOAT_EXPR. */
5666 else if (code == FLOAT_EXPR)
5667 {
5668 wide_int op_min_value, op_max_value;
5669 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5670 goto unsupported;
5671
5672 cvt_type
5673 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5674 if (cvt_type == NULL_TREE
5675 || (wi::min_precision (op_max_value, SIGNED)
5676 > TYPE_PRECISION (cvt_type))
5677 || (wi::min_precision (op_min_value, SIGNED)
5678 > TYPE_PRECISION (cvt_type)))
5679 goto unsupported;
5680
5681 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5682 if (cvt_type == NULL_TREE)
5683 goto unsupported;
5684 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5685 &code1, &multi_step_cvt,
5686 &interm_types))
5687 goto unsupported;
5688 if (supportable_convert_operation ((tree_code) code, vectype_out,
5689 cvt_type, &tc1))
5690 {
5691 codecvt1 = tc1;
5692 modifier = NARROW_SRC;
5693 break;
5694 }
5695 }
5696
5697 goto unsupported;
5698
5699 default:
5700 gcc_unreachable ();
5701 }
5702
5703 if (!vec_stmt) /* transformation not required. */
5704 {
5705 if (slp_node
5706 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5707 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5708 {
5709 if (dump_enabled_p ())
5710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5711 "incompatible vector types for invariants\n");
5712 return false;
5713 }
5714 DUMP_VECT_SCOPE ("vectorizable_conversion");
5715 if (modifier == NONE)
5716 {
5717 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5718 vect_model_simple_cost (vinfo, stmt_info,
5719 ncopies * (1 + multi_step_cvt),
5720 dt, ndts, slp_node, cost_vec);
5721 }
5722 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5723 {
5724 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5725 /* The final packing step produces one vector result per copy. */
5726 unsigned int nvectors
5727 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5728 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5729 multi_step_cvt, cost_vec,
5730 widen_arith);
5731 }
5732 else
5733 {
5734 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5735 /* The initial unpacking step produces two vector results
5736 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5737 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5738 unsigned int nvectors
5739 = (slp_node
5740 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5741 : ncopies * 2);
5742 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5743 multi_step_cvt, cost_vec,
5744 widen_arith);
5745 }
5746 interm_types.release ();
5747 return true;
5748 }
5749
5750 /* Transform. */
5751 if (dump_enabled_p ())
5752 dump_printf_loc (MSG_NOTE, vect_location,
5753 "transform conversion. ncopies = %d.\n", ncopies);
5754
5755 if (op_type == binary_op)
5756 {
5757 if (CONSTANT_CLASS_P (op0))
5758 op0 = fold_convert (TREE_TYPE (op1), op0);
5759 else if (CONSTANT_CLASS_P (op1))
5760 op1 = fold_convert (TREE_TYPE (op0), op1);
5761 }
5762
5763 /* In case of multi-step conversion, we first generate conversion operations
5764 to the intermediate types, and then from that types to the final one.
5765 We create vector destinations for the intermediate type (TYPES) received
5766 from supportable_*_operation, and store them in the correct order
5767 for future use in vect_create_vectorized_*_stmts (). */
5768 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5769 bool widen_or_narrow_float_p
5770 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5771 vec_dest = vect_create_destination_var (scalar_dest,
5772 widen_or_narrow_float_p
5773 ? cvt_type : vectype_out);
5774 vec_dsts.quick_push (vec_dest);
5775
5776 if (multi_step_cvt)
5777 {
5778 for (i = interm_types.length () - 1;
5779 interm_types.iterate (i, &intermediate_type); i--)
5780 {
5781 vec_dest = vect_create_destination_var (scalar_dest,
5782 intermediate_type);
5783 vec_dsts.quick_push (vec_dest);
5784 }
5785 }
5786
5787 if (cvt_type)
5788 vec_dest = vect_create_destination_var (scalar_dest,
5789 widen_or_narrow_float_p
5790 ? vectype_out : cvt_type);
5791
5792 int ninputs = 1;
5793 if (!slp_node)
5794 {
5795 if (modifier == WIDEN)
5796 ;
5797 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5798 {
5799 if (multi_step_cvt)
5800 ninputs = vect_pow2 (multi_step_cvt);
5801 ninputs *= 2;
5802 }
5803 }
5804
5805 switch (modifier)
5806 {
5807 case NONE:
5808 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5809 op0, &vec_oprnds0);
5810 /* vec_dest is intermediate type operand when multi_step_cvt. */
5811 if (multi_step_cvt)
5812 {
5813 cvt_op = vec_dest;
5814 vec_dest = vec_dsts[0];
5815 }
5816
5817 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5818 {
5819 /* Arguments are ready, create the new vector stmt. */
5820 gimple* new_stmt;
5821 if (multi_step_cvt)
5822 {
5823 gcc_assert (multi_step_cvt == 1);
5824 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5825 new_temp = make_ssa_name (cvt_op, new_stmt);
5826 gimple_assign_set_lhs (new_stmt, new_temp);
5827 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5828 vop0 = new_temp;
5829 }
5830 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5831 new_temp = make_ssa_name (vec_dest, new_stmt);
5832 gimple_set_lhs (new_stmt, new_temp);
5833 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5834
5835 if (slp_node)
5836 slp_node->push_vec_def (new_stmt);
5837 else
5838 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5839 }
5840 break;
5841
5842 case WIDEN:
5843 /* In case the vectorization factor (VF) is bigger than the number
5844 of elements that we can fit in a vectype (nunits), we have to
5845 generate more than one vector stmt - i.e - we need to "unroll"
5846 the vector stmt by a factor VF/nunits. */
5847 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5848 op0, &vec_oprnds0,
5849 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5850 &vec_oprnds1);
5851 if (code == WIDEN_LSHIFT_EXPR)
5852 {
5853 int oprnds_size = vec_oprnds0.length ();
5854 vec_oprnds1.create (oprnds_size);
5855 for (i = 0; i < oprnds_size; ++i)
5856 vec_oprnds1.quick_push (op1);
5857 }
5858 /* Arguments are ready. Create the new vector stmts. */
5859 for (i = multi_step_cvt; i >= 0; i--)
5860 {
5861 tree this_dest = vec_dsts[i];
5862 code_helper c1 = code1, c2 = code2;
5863 if (i == 0 && codecvt2 != ERROR_MARK)
5864 {
5865 c1 = codecvt1;
5866 c2 = codecvt2;
5867 }
5868 if (known_eq (nunits_out, nunits_in))
5869 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5870 stmt_info, this_dest, gsi, c1,
5871 op_type);
5872 else
5873 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5874 &vec_oprnds1, stmt_info,
5875 this_dest, gsi,
5876 c1, c2, op_type);
5877 }
5878
5879 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5880 {
5881 gimple *new_stmt;
5882 if (cvt_type)
5883 {
5884 new_temp = make_ssa_name (vec_dest);
5885 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5886 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5887 }
5888 else
5889 new_stmt = SSA_NAME_DEF_STMT (vop0);
5890
5891 if (slp_node)
5892 slp_node->push_vec_def (new_stmt);
5893 else
5894 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5895 }
5896 break;
5897
5898 case NARROW_SRC:
5899 case NARROW_DST:
5900 /* In case the vectorization factor (VF) is bigger than the number
5901 of elements that we can fit in a vectype (nunits), we have to
5902 generate more than one vector stmt - i.e - we need to "unroll"
5903 the vector stmt by a factor VF/nunits. */
5904 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5905 op0, &vec_oprnds0);
5906 /* Arguments are ready. Create the new vector stmts. */
5907 if (cvt_type && modifier == NARROW_DST)
5908 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5909 {
5910 new_temp = make_ssa_name (vec_dest);
5911 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5912 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5913 vec_oprnds0[i] = new_temp;
5914 }
5915
5916 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5917 multi_step_cvt,
5918 stmt_info, vec_dsts, gsi,
5919 slp_node, code1,
5920 modifier == NARROW_SRC);
5921 /* After demoting op0 to cvt_type, convert it to dest. */
5922 if (cvt_type && code == FLOAT_EXPR)
5923 {
5924 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5925 {
5926 /* Arguments are ready, create the new vector stmt. */
5927 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5928 gimple *new_stmt
5929 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5930 new_temp = make_ssa_name (vec_dest, new_stmt);
5931 gimple_set_lhs (new_stmt, new_temp);
5932 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5933
5934 /* This is the last step of the conversion sequence. Store the
5935 vectors in SLP_NODE or in vector info of the scalar statement
5936 (or in STMT_VINFO_RELATED_STMT chain). */
5937 if (slp_node)
5938 slp_node->push_vec_def (new_stmt);
5939 else
5940 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5941 }
5942 }
5943 break;
5944 }
5945 if (!slp_node)
5946 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5947
5948 vec_oprnds0.release ();
5949 vec_oprnds1.release ();
5950 interm_types.release ();
5951
5952 return true;
5953 }
5954
5955 /* Return true if we can assume from the scalar form of STMT_INFO that
5956 neither the scalar nor the vector forms will generate code. STMT_INFO
5957 is known not to involve a data reference. */
5958
5959 bool
5960 vect_nop_conversion_p (stmt_vec_info stmt_info)
5961 {
5962 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5963 if (!stmt)
5964 return false;
5965
5966 tree lhs = gimple_assign_lhs (stmt);
5967 tree_code code = gimple_assign_rhs_code (stmt);
5968 tree rhs = gimple_assign_rhs1 (stmt);
5969
5970 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5971 return true;
5972
5973 if (CONVERT_EXPR_CODE_P (code))
5974 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5975
5976 return false;
5977 }
5978
5979 /* Function vectorizable_assignment.
5980
5981 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5982 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5983 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5984 Return true if STMT_INFO is vectorizable in this way. */
5985
5986 static bool
5987 vectorizable_assignment (vec_info *vinfo,
5988 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5989 gimple **vec_stmt, slp_tree slp_node,
5990 stmt_vector_for_cost *cost_vec)
5991 {
5992 tree vec_dest;
5993 tree scalar_dest;
5994 tree op;
5995 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5996 tree new_temp;
5997 enum vect_def_type dt[1] = {vect_unknown_def_type};
5998 int ndts = 1;
5999 int ncopies;
6000 int i;
6001 vec<tree> vec_oprnds = vNULL;
6002 tree vop;
6003 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6004 enum tree_code code;
6005 tree vectype_in;
6006
6007 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6008 return false;
6009
6010 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6011 && ! vec_stmt)
6012 return false;
6013
6014 /* Is vectorizable assignment? */
6015 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6016 if (!stmt)
6017 return false;
6018
6019 scalar_dest = gimple_assign_lhs (stmt);
6020 if (TREE_CODE (scalar_dest) != SSA_NAME)
6021 return false;
6022
6023 if (STMT_VINFO_DATA_REF (stmt_info))
6024 return false;
6025
6026 code = gimple_assign_rhs_code (stmt);
6027 if (!(gimple_assign_single_p (stmt)
6028 || code == PAREN_EXPR
6029 || CONVERT_EXPR_CODE_P (code)))
6030 return false;
6031
6032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6033 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6034
6035 /* Multiple types in SLP are handled by creating the appropriate number of
6036 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6037 case of SLP. */
6038 if (slp_node)
6039 ncopies = 1;
6040 else
6041 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6042
6043 gcc_assert (ncopies >= 1);
6044
6045 slp_tree slp_op;
6046 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
6047 &dt[0], &vectype_in))
6048 {
6049 if (dump_enabled_p ())
6050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6051 "use not simple.\n");
6052 return false;
6053 }
6054 if (!vectype_in)
6055 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
6056
6057 /* We can handle NOP_EXPR conversions that do not change the number
6058 of elements or the vector size. */
6059 if ((CONVERT_EXPR_CODE_P (code)
6060 || code == VIEW_CONVERT_EXPR)
6061 && (!vectype_in
6062 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
6063 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
6064 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
6065 return false;
6066
6067 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
6068 {
6069 if (dump_enabled_p ())
6070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6071 "can't convert between boolean and non "
6072 "boolean vectors %T\n", TREE_TYPE (op));
6073
6074 return false;
6075 }
6076
6077 /* We do not handle bit-precision changes. */
6078 if ((CONVERT_EXPR_CODE_P (code)
6079 || code == VIEW_CONVERT_EXPR)
6080 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6081 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6082 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6083 && !type_has_mode_precision_p (TREE_TYPE (op))))
6084 /* But a conversion that does not change the bit-pattern is ok. */
6085 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6086 && INTEGRAL_TYPE_P (TREE_TYPE (op))
6087 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6088 > TYPE_PRECISION (TREE_TYPE (op)))
6089 && TYPE_UNSIGNED (TREE_TYPE (op))))
6090 {
6091 if (dump_enabled_p ())
6092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6093 "type conversion to/from bit-precision "
6094 "unsupported.\n");
6095 return false;
6096 }
6097
6098 if (!vec_stmt) /* transformation not required. */
6099 {
6100 if (slp_node
6101 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6102 {
6103 if (dump_enabled_p ())
6104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6105 "incompatible vector types for invariants\n");
6106 return false;
6107 }
6108 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6109 DUMP_VECT_SCOPE ("vectorizable_assignment");
6110 if (!vect_nop_conversion_p (stmt_info))
6111 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6112 cost_vec);
6113 return true;
6114 }
6115
6116 /* Transform. */
6117 if (dump_enabled_p ())
6118 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6119
6120 /* Handle def. */
6121 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6122
6123 /* Handle use. */
6124 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6125
6126 /* Arguments are ready. create the new vector stmt. */
6127 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6128 {
6129 if (CONVERT_EXPR_CODE_P (code)
6130 || code == VIEW_CONVERT_EXPR)
6131 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6132 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6133 new_temp = make_ssa_name (vec_dest, new_stmt);
6134 gimple_assign_set_lhs (new_stmt, new_temp);
6135 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6136 if (slp_node)
6137 slp_node->push_vec_def (new_stmt);
6138 else
6139 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6140 }
6141 if (!slp_node)
6142 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6143
6144 vec_oprnds.release ();
6145 return true;
6146 }
6147
6148
6149 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6150 either as shift by a scalar or by a vector. */
6151
6152 bool
6153 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6154 {
6155
6156 machine_mode vec_mode;
6157 optab optab;
6158 int icode;
6159 tree vectype;
6160
6161 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6162 if (!vectype)
6163 return false;
6164
6165 optab = optab_for_tree_code (code, vectype, optab_scalar);
6166 if (!optab
6167 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6168 {
6169 optab = optab_for_tree_code (code, vectype, optab_vector);
6170 if (!optab
6171 || (optab_handler (optab, TYPE_MODE (vectype))
6172 == CODE_FOR_nothing))
6173 return false;
6174 }
6175
6176 vec_mode = TYPE_MODE (vectype);
6177 icode = (int) optab_handler (optab, vec_mode);
6178 if (icode == CODE_FOR_nothing)
6179 return false;
6180
6181 return true;
6182 }
6183
6184
6185 /* Function vectorizable_shift.
6186
6187 Check if STMT_INFO performs a shift operation that can be vectorized.
6188 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6189 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6190 Return true if STMT_INFO is vectorizable in this way. */
6191
6192 static bool
6193 vectorizable_shift (vec_info *vinfo,
6194 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6195 gimple **vec_stmt, slp_tree slp_node,
6196 stmt_vector_for_cost *cost_vec)
6197 {
6198 tree vec_dest;
6199 tree scalar_dest;
6200 tree op0, op1 = NULL;
6201 tree vec_oprnd1 = NULL_TREE;
6202 tree vectype;
6203 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6204 enum tree_code code;
6205 machine_mode vec_mode;
6206 tree new_temp;
6207 optab optab;
6208 int icode;
6209 machine_mode optab_op2_mode;
6210 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6211 int ndts = 2;
6212 poly_uint64 nunits_in;
6213 poly_uint64 nunits_out;
6214 tree vectype_out;
6215 tree op1_vectype;
6216 int ncopies;
6217 int i;
6218 vec<tree> vec_oprnds0 = vNULL;
6219 vec<tree> vec_oprnds1 = vNULL;
6220 tree vop0, vop1;
6221 unsigned int k;
6222 bool scalar_shift_arg = true;
6223 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6224 bool incompatible_op1_vectype_p = false;
6225
6226 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6227 return false;
6228
6229 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6230 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6231 && ! vec_stmt)
6232 return false;
6233
6234 /* Is STMT a vectorizable binary/unary operation? */
6235 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6236 if (!stmt)
6237 return false;
6238
6239 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6240 return false;
6241
6242 code = gimple_assign_rhs_code (stmt);
6243
6244 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6245 || code == RROTATE_EXPR))
6246 return false;
6247
6248 scalar_dest = gimple_assign_lhs (stmt);
6249 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6250 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6251 {
6252 if (dump_enabled_p ())
6253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6254 "bit-precision shifts not supported.\n");
6255 return false;
6256 }
6257
6258 slp_tree slp_op0;
6259 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6260 0, &op0, &slp_op0, &dt[0], &vectype))
6261 {
6262 if (dump_enabled_p ())
6263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6264 "use not simple.\n");
6265 return false;
6266 }
6267 /* If op0 is an external or constant def, infer the vector type
6268 from the scalar type. */
6269 if (!vectype)
6270 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6271 if (vec_stmt)
6272 gcc_assert (vectype);
6273 if (!vectype)
6274 {
6275 if (dump_enabled_p ())
6276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6277 "no vectype for scalar type\n");
6278 return false;
6279 }
6280
6281 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6282 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6283 if (maybe_ne (nunits_out, nunits_in))
6284 return false;
6285
6286 stmt_vec_info op1_def_stmt_info;
6287 slp_tree slp_op1;
6288 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6289 &dt[1], &op1_vectype, &op1_def_stmt_info))
6290 {
6291 if (dump_enabled_p ())
6292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6293 "use not simple.\n");
6294 return false;
6295 }
6296
6297 /* Multiple types in SLP are handled by creating the appropriate number of
6298 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6299 case of SLP. */
6300 if (slp_node)
6301 ncopies = 1;
6302 else
6303 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6304
6305 gcc_assert (ncopies >= 1);
6306
6307 /* Determine whether the shift amount is a vector, or scalar. If the
6308 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6309
6310 if ((dt[1] == vect_internal_def
6311 || dt[1] == vect_induction_def
6312 || dt[1] == vect_nested_cycle)
6313 && !slp_node)
6314 scalar_shift_arg = false;
6315 else if (dt[1] == vect_constant_def
6316 || dt[1] == vect_external_def
6317 || dt[1] == vect_internal_def)
6318 {
6319 /* In SLP, need to check whether the shift count is the same,
6320 in loops if it is a constant or invariant, it is always
6321 a scalar shift. */
6322 if (slp_node)
6323 {
6324 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6325 stmt_vec_info slpstmt_info;
6326
6327 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6328 {
6329 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6330 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6331 scalar_shift_arg = false;
6332 }
6333
6334 /* For internal SLP defs we have to make sure we see scalar stmts
6335 for all vector elements.
6336 ??? For different vectors we could resort to a different
6337 scalar shift operand but code-generation below simply always
6338 takes the first. */
6339 if (dt[1] == vect_internal_def
6340 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6341 stmts.length ()))
6342 scalar_shift_arg = false;
6343 }
6344
6345 /* If the shift amount is computed by a pattern stmt we cannot
6346 use the scalar amount directly thus give up and use a vector
6347 shift. */
6348 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6349 scalar_shift_arg = false;
6350 }
6351 else
6352 {
6353 if (dump_enabled_p ())
6354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6355 "operand mode requires invariant argument.\n");
6356 return false;
6357 }
6358
6359 /* Vector shifted by vector. */
6360 bool was_scalar_shift_arg = scalar_shift_arg;
6361 if (!scalar_shift_arg)
6362 {
6363 optab = optab_for_tree_code (code, vectype, optab_vector);
6364 if (dump_enabled_p ())
6365 dump_printf_loc (MSG_NOTE, vect_location,
6366 "vector/vector shift/rotate found.\n");
6367
6368 if (!op1_vectype)
6369 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6370 slp_op1);
6371 incompatible_op1_vectype_p
6372 = (op1_vectype == NULL_TREE
6373 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6374 TYPE_VECTOR_SUBPARTS (vectype))
6375 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6376 if (incompatible_op1_vectype_p
6377 && (!slp_node
6378 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6379 || slp_op1->refcnt != 1))
6380 {
6381 if (dump_enabled_p ())
6382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6383 "unusable type for last operand in"
6384 " vector/vector shift/rotate.\n");
6385 return false;
6386 }
6387 }
6388 /* See if the machine has a vector shifted by scalar insn and if not
6389 then see if it has a vector shifted by vector insn. */
6390 else
6391 {
6392 optab = optab_for_tree_code (code, vectype, optab_scalar);
6393 if (optab
6394 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6395 {
6396 if (dump_enabled_p ())
6397 dump_printf_loc (MSG_NOTE, vect_location,
6398 "vector/scalar shift/rotate found.\n");
6399 }
6400 else
6401 {
6402 optab = optab_for_tree_code (code, vectype, optab_vector);
6403 if (optab
6404 && (optab_handler (optab, TYPE_MODE (vectype))
6405 != CODE_FOR_nothing))
6406 {
6407 scalar_shift_arg = false;
6408
6409 if (dump_enabled_p ())
6410 dump_printf_loc (MSG_NOTE, vect_location,
6411 "vector/vector shift/rotate found.\n");
6412
6413 if (!op1_vectype)
6414 op1_vectype = get_vectype_for_scalar_type (vinfo,
6415 TREE_TYPE (op1),
6416 slp_op1);
6417
6418 /* Unlike the other binary operators, shifts/rotates have
6419 the rhs being int, instead of the same type as the lhs,
6420 so make sure the scalar is the right type if we are
6421 dealing with vectors of long long/long/short/char. */
6422 incompatible_op1_vectype_p
6423 = (!op1_vectype
6424 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6425 TREE_TYPE (op1)));
6426 if (incompatible_op1_vectype_p
6427 && dt[1] == vect_internal_def)
6428 {
6429 if (dump_enabled_p ())
6430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6431 "unusable type for last operand in"
6432 " vector/vector shift/rotate.\n");
6433 return false;
6434 }
6435 }
6436 }
6437 }
6438
6439 /* Supportable by target? */
6440 if (!optab)
6441 {
6442 if (dump_enabled_p ())
6443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6444 "no optab.\n");
6445 return false;
6446 }
6447 vec_mode = TYPE_MODE (vectype);
6448 icode = (int) optab_handler (optab, vec_mode);
6449 if (icode == CODE_FOR_nothing)
6450 {
6451 if (dump_enabled_p ())
6452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6453 "op not supported by target.\n");
6454 return false;
6455 }
6456 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6457 if (vect_emulated_vector_p (vectype))
6458 return false;
6459
6460 if (!vec_stmt) /* transformation not required. */
6461 {
6462 if (slp_node
6463 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6464 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6465 && (!incompatible_op1_vectype_p
6466 || dt[1] == vect_constant_def)
6467 && !vect_maybe_update_slp_op_vectype
6468 (slp_op1,
6469 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6470 {
6471 if (dump_enabled_p ())
6472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6473 "incompatible vector types for invariants\n");
6474 return false;
6475 }
6476 /* Now adjust the constant shift amount in place. */
6477 if (slp_node
6478 && incompatible_op1_vectype_p
6479 && dt[1] == vect_constant_def)
6480 {
6481 for (unsigned i = 0;
6482 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6483 {
6484 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6485 = fold_convert (TREE_TYPE (vectype),
6486 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6487 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6488 == INTEGER_CST));
6489 }
6490 }
6491 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6492 DUMP_VECT_SCOPE ("vectorizable_shift");
6493 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6494 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6495 return true;
6496 }
6497
6498 /* Transform. */
6499
6500 if (dump_enabled_p ())
6501 dump_printf_loc (MSG_NOTE, vect_location,
6502 "transform binary/unary operation.\n");
6503
6504 if (incompatible_op1_vectype_p && !slp_node)
6505 {
6506 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6507 op1 = fold_convert (TREE_TYPE (vectype), op1);
6508 if (dt[1] != vect_constant_def)
6509 op1 = vect_init_vector (vinfo, stmt_info, op1,
6510 TREE_TYPE (vectype), NULL);
6511 }
6512
6513 /* Handle def. */
6514 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6515
6516 if (scalar_shift_arg && dt[1] != vect_internal_def)
6517 {
6518 /* Vector shl and shr insn patterns can be defined with scalar
6519 operand 2 (shift operand). In this case, use constant or loop
6520 invariant op1 directly, without extending it to vector mode
6521 first. */
6522 optab_op2_mode = insn_data[icode].operand[2].mode;
6523 if (!VECTOR_MODE_P (optab_op2_mode))
6524 {
6525 if (dump_enabled_p ())
6526 dump_printf_loc (MSG_NOTE, vect_location,
6527 "operand 1 using scalar mode.\n");
6528 vec_oprnd1 = op1;
6529 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6530 vec_oprnds1.quick_push (vec_oprnd1);
6531 /* Store vec_oprnd1 for every vector stmt to be created.
6532 We check during the analysis that all the shift arguments
6533 are the same.
6534 TODO: Allow different constants for different vector
6535 stmts generated for an SLP instance. */
6536 for (k = 0;
6537 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6538 vec_oprnds1.quick_push (vec_oprnd1);
6539 }
6540 }
6541 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6542 {
6543 if (was_scalar_shift_arg)
6544 {
6545 /* If the argument was the same in all lanes create
6546 the correctly typed vector shift amount directly. */
6547 op1 = fold_convert (TREE_TYPE (vectype), op1);
6548 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6549 !loop_vinfo ? gsi : NULL);
6550 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6551 !loop_vinfo ? gsi : NULL);
6552 vec_oprnds1.create (slp_node->vec_stmts_size);
6553 for (k = 0; k < slp_node->vec_stmts_size; k++)
6554 vec_oprnds1.quick_push (vec_oprnd1);
6555 }
6556 else if (dt[1] == vect_constant_def)
6557 /* The constant shift amount has been adjusted in place. */
6558 ;
6559 else
6560 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6561 }
6562
6563 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6564 (a special case for certain kind of vector shifts); otherwise,
6565 operand 1 should be of a vector type (the usual case). */
6566 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6567 op0, &vec_oprnds0,
6568 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6569
6570 /* Arguments are ready. Create the new vector stmt. */
6571 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6572 {
6573 /* For internal defs where we need to use a scalar shift arg
6574 extract the first lane. */
6575 if (scalar_shift_arg && dt[1] == vect_internal_def)
6576 {
6577 vop1 = vec_oprnds1[0];
6578 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6579 gassign *new_stmt
6580 = gimple_build_assign (new_temp,
6581 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6582 vop1,
6583 TYPE_SIZE (TREE_TYPE (new_temp)),
6584 bitsize_zero_node));
6585 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6586 vop1 = new_temp;
6587 }
6588 else
6589 vop1 = vec_oprnds1[i];
6590 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6591 new_temp = make_ssa_name (vec_dest, new_stmt);
6592 gimple_assign_set_lhs (new_stmt, new_temp);
6593 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6594 if (slp_node)
6595 slp_node->push_vec_def (new_stmt);
6596 else
6597 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6598 }
6599
6600 if (!slp_node)
6601 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6602
6603 vec_oprnds0.release ();
6604 vec_oprnds1.release ();
6605
6606 return true;
6607 }
6608
6609 /* Function vectorizable_operation.
6610
6611 Check if STMT_INFO performs a binary, unary or ternary operation that can
6612 be vectorized.
6613 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6614 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6615 Return true if STMT_INFO is vectorizable in this way. */
6616
6617 static bool
6618 vectorizable_operation (vec_info *vinfo,
6619 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6620 gimple **vec_stmt, slp_tree slp_node,
6621 stmt_vector_for_cost *cost_vec)
6622 {
6623 tree vec_dest;
6624 tree scalar_dest;
6625 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6626 tree vectype;
6627 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6628 enum tree_code code, orig_code;
6629 machine_mode vec_mode;
6630 tree new_temp;
6631 int op_type;
6632 optab optab;
6633 bool target_support_p;
6634 enum vect_def_type dt[3]
6635 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6636 int ndts = 3;
6637 poly_uint64 nunits_in;
6638 poly_uint64 nunits_out;
6639 tree vectype_out;
6640 int ncopies, vec_num;
6641 int i;
6642 vec<tree> vec_oprnds0 = vNULL;
6643 vec<tree> vec_oprnds1 = vNULL;
6644 vec<tree> vec_oprnds2 = vNULL;
6645 tree vop0, vop1, vop2;
6646 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6647
6648 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6649 return false;
6650
6651 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6652 && ! vec_stmt)
6653 return false;
6654
6655 /* Is STMT a vectorizable binary/unary operation? */
6656 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6657 if (!stmt)
6658 return false;
6659
6660 /* Loads and stores are handled in vectorizable_{load,store}. */
6661 if (STMT_VINFO_DATA_REF (stmt_info))
6662 return false;
6663
6664 orig_code = code = gimple_assign_rhs_code (stmt);
6665
6666 /* Shifts are handled in vectorizable_shift. */
6667 if (code == LSHIFT_EXPR
6668 || code == RSHIFT_EXPR
6669 || code == LROTATE_EXPR
6670 || code == RROTATE_EXPR)
6671 return false;
6672
6673 /* Comparisons are handled in vectorizable_comparison. */
6674 if (TREE_CODE_CLASS (code) == tcc_comparison)
6675 return false;
6676
6677 /* Conditions are handled in vectorizable_condition. */
6678 if (code == COND_EXPR)
6679 return false;
6680
6681 /* For pointer addition and subtraction, we should use the normal
6682 plus and minus for the vector operation. */
6683 if (code == POINTER_PLUS_EXPR)
6684 code = PLUS_EXPR;
6685 if (code == POINTER_DIFF_EXPR)
6686 code = MINUS_EXPR;
6687
6688 /* Support only unary or binary operations. */
6689 op_type = TREE_CODE_LENGTH (code);
6690 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6691 {
6692 if (dump_enabled_p ())
6693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6694 "num. args = %d (not unary/binary/ternary op).\n",
6695 op_type);
6696 return false;
6697 }
6698
6699 scalar_dest = gimple_assign_lhs (stmt);
6700 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6701
6702 /* Most operations cannot handle bit-precision types without extra
6703 truncations. */
6704 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6705 if (!mask_op_p
6706 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6707 /* Exception are bitwise binary operations. */
6708 && code != BIT_IOR_EXPR
6709 && code != BIT_XOR_EXPR
6710 && code != BIT_AND_EXPR)
6711 {
6712 if (dump_enabled_p ())
6713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6714 "bit-precision arithmetic not supported.\n");
6715 return false;
6716 }
6717
6718 slp_tree slp_op0;
6719 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6720 0, &op0, &slp_op0, &dt[0], &vectype))
6721 {
6722 if (dump_enabled_p ())
6723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6724 "use not simple.\n");
6725 return false;
6726 }
6727 bool is_invariant = (dt[0] == vect_external_def
6728 || dt[0] == vect_constant_def);
6729 /* If op0 is an external or constant def, infer the vector type
6730 from the scalar type. */
6731 if (!vectype)
6732 {
6733 /* For boolean type we cannot determine vectype by
6734 invariant value (don't know whether it is a vector
6735 of booleans or vector of integers). We use output
6736 vectype because operations on boolean don't change
6737 type. */
6738 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6739 {
6740 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6741 {
6742 if (dump_enabled_p ())
6743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6744 "not supported operation on bool value.\n");
6745 return false;
6746 }
6747 vectype = vectype_out;
6748 }
6749 else
6750 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6751 slp_node);
6752 }
6753 if (vec_stmt)
6754 gcc_assert (vectype);
6755 if (!vectype)
6756 {
6757 if (dump_enabled_p ())
6758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6759 "no vectype for scalar type %T\n",
6760 TREE_TYPE (op0));
6761
6762 return false;
6763 }
6764
6765 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6766 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6767 if (maybe_ne (nunits_out, nunits_in))
6768 return false;
6769
6770 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6771 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6772 if (op_type == binary_op || op_type == ternary_op)
6773 {
6774 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6775 1, &op1, &slp_op1, &dt[1], &vectype2))
6776 {
6777 if (dump_enabled_p ())
6778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6779 "use not simple.\n");
6780 return false;
6781 }
6782 is_invariant &= (dt[1] == vect_external_def
6783 || dt[1] == vect_constant_def);
6784 if (vectype2
6785 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6786 return false;
6787 }
6788 if (op_type == ternary_op)
6789 {
6790 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6791 2, &op2, &slp_op2, &dt[2], &vectype3))
6792 {
6793 if (dump_enabled_p ())
6794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6795 "use not simple.\n");
6796 return false;
6797 }
6798 is_invariant &= (dt[2] == vect_external_def
6799 || dt[2] == vect_constant_def);
6800 if (vectype3
6801 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6802 return false;
6803 }
6804
6805 /* Multiple types in SLP are handled by creating the appropriate number of
6806 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6807 case of SLP. */
6808 if (slp_node)
6809 {
6810 ncopies = 1;
6811 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6812 }
6813 else
6814 {
6815 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6816 vec_num = 1;
6817 }
6818
6819 gcc_assert (ncopies >= 1);
6820
6821 /* Reject attempts to combine mask types with nonmask types, e.g. if
6822 we have an AND between a (nonmask) boolean loaded from memory and
6823 a (mask) boolean result of a comparison.
6824
6825 TODO: We could easily fix these cases up using pattern statements. */
6826 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6827 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6828 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6829 {
6830 if (dump_enabled_p ())
6831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6832 "mixed mask and nonmask vector types\n");
6833 return false;
6834 }
6835
6836 /* Supportable by target? */
6837
6838 vec_mode = TYPE_MODE (vectype);
6839 if (code == MULT_HIGHPART_EXPR)
6840 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6841 else
6842 {
6843 optab = optab_for_tree_code (code, vectype, optab_default);
6844 if (!optab)
6845 {
6846 if (dump_enabled_p ())
6847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6848 "no optab.\n");
6849 return false;
6850 }
6851 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6852 || optab_libfunc (optab, vec_mode));
6853 }
6854
6855 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6856 if (!target_support_p || using_emulated_vectors_p)
6857 {
6858 if (dump_enabled_p ())
6859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6860 "op not supported by target.\n");
6861 /* When vec_mode is not a vector mode and we verified ops we
6862 do not have to lower like AND are natively supported let
6863 those through even when the mode isn't word_mode. For
6864 ops we have to lower the lowering code assumes we are
6865 dealing with word_mode. */
6866 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6867 || !target_support_p)
6868 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6869 /* Check only during analysis. */
6870 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6871 {
6872 if (dump_enabled_p ())
6873 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6874 return false;
6875 }
6876 if (dump_enabled_p ())
6877 dump_printf_loc (MSG_NOTE, vect_location,
6878 "proceeding using word mode.\n");
6879 using_emulated_vectors_p = true;
6880 }
6881
6882 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6883 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6884 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6885 internal_fn cond_fn = get_conditional_internal_fn (code);
6886 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6887
6888 /* If operating on inactive elements could generate spurious traps,
6889 we need to restrict the operation to active lanes. Note that this
6890 specifically doesn't apply to unhoisted invariants, since they
6891 operate on the same value for every lane.
6892
6893 Similarly, if this operation is part of a reduction, a fully-masked
6894 loop should only change the active lanes of the reduction chain,
6895 keeping the inactive lanes as-is. */
6896 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6897 || reduc_idx >= 0);
6898
6899 if (!vec_stmt) /* transformation not required. */
6900 {
6901 if (loop_vinfo
6902 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6903 && mask_out_inactive)
6904 {
6905 if (cond_len_fn != IFN_LAST
6906 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6907 OPTIMIZE_FOR_SPEED))
6908 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6909 1);
6910 else if (cond_fn != IFN_LAST
6911 && direct_internal_fn_supported_p (cond_fn, vectype,
6912 OPTIMIZE_FOR_SPEED))
6913 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6914 vectype, NULL);
6915 else
6916 {
6917 if (dump_enabled_p ())
6918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6919 "can't use a fully-masked loop because no"
6920 " conditional operation is available.\n");
6921 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6922 }
6923 }
6924
6925 /* Put types on constant and invariant SLP children. */
6926 if (slp_node
6927 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6928 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6929 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6930 {
6931 if (dump_enabled_p ())
6932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6933 "incompatible vector types for invariants\n");
6934 return false;
6935 }
6936
6937 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6938 DUMP_VECT_SCOPE ("vectorizable_operation");
6939 vect_model_simple_cost (vinfo, stmt_info,
6940 ncopies, dt, ndts, slp_node, cost_vec);
6941 if (using_emulated_vectors_p)
6942 {
6943 /* The above vect_model_simple_cost call handles constants
6944 in the prologue and (mis-)costs one of the stmts as
6945 vector stmt. See below for the actual lowering that will
6946 be applied. */
6947 unsigned n
6948 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6949 switch (code)
6950 {
6951 case PLUS_EXPR:
6952 n *= 5;
6953 break;
6954 case MINUS_EXPR:
6955 n *= 6;
6956 break;
6957 case NEGATE_EXPR:
6958 n *= 4;
6959 break;
6960 default:
6961 /* Bit operations do not have extra cost and are accounted
6962 as vector stmt by vect_model_simple_cost. */
6963 n = 0;
6964 break;
6965 }
6966 if (n != 0)
6967 {
6968 /* We also need to materialize two large constants. */
6969 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6970 0, vect_prologue);
6971 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6972 0, vect_body);
6973 }
6974 }
6975 return true;
6976 }
6977
6978 /* Transform. */
6979
6980 if (dump_enabled_p ())
6981 dump_printf_loc (MSG_NOTE, vect_location,
6982 "transform binary/unary operation.\n");
6983
6984 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6985 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6986
6987 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6988 vectors with unsigned elements, but the result is signed. So, we
6989 need to compute the MINUS_EXPR into vectype temporary and
6990 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6991 tree vec_cvt_dest = NULL_TREE;
6992 if (orig_code == POINTER_DIFF_EXPR)
6993 {
6994 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6995 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6996 }
6997 /* Handle def. */
6998 else
6999 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
7000
7001 /* In case the vectorization factor (VF) is bigger than the number
7002 of elements that we can fit in a vectype (nunits), we have to generate
7003 more than one vector stmt - i.e - we need to "unroll" the
7004 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7005 from one copy of the vector stmt to the next, in the field
7006 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7007 stages to find the correct vector defs to be used when vectorizing
7008 stmts that use the defs of the current stmt. The example below
7009 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
7010 we need to create 4 vectorized stmts):
7011
7012 before vectorization:
7013 RELATED_STMT VEC_STMT
7014 S1: x = memref - -
7015 S2: z = x + 1 - -
7016
7017 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
7018 there):
7019 RELATED_STMT VEC_STMT
7020 VS1_0: vx0 = memref0 VS1_1 -
7021 VS1_1: vx1 = memref1 VS1_2 -
7022 VS1_2: vx2 = memref2 VS1_3 -
7023 VS1_3: vx3 = memref3 - -
7024 S1: x = load - VS1_0
7025 S2: z = x + 1 - -
7026
7027 step2: vectorize stmt S2 (done here):
7028 To vectorize stmt S2 we first need to find the relevant vector
7029 def for the first operand 'x'. This is, as usual, obtained from
7030 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
7031 that defines 'x' (S1). This way we find the stmt VS1_0, and the
7032 relevant vector def 'vx0'. Having found 'vx0' we can generate
7033 the vector stmt VS2_0, and as usual, record it in the
7034 STMT_VINFO_VEC_STMT of stmt S2.
7035 When creating the second copy (VS2_1), we obtain the relevant vector
7036 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
7037 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
7038 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
7039 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
7040 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
7041 chain of stmts and pointers:
7042 RELATED_STMT VEC_STMT
7043 VS1_0: vx0 = memref0 VS1_1 -
7044 VS1_1: vx1 = memref1 VS1_2 -
7045 VS1_2: vx2 = memref2 VS1_3 -
7046 VS1_3: vx3 = memref3 - -
7047 S1: x = load - VS1_0
7048 VS2_0: vz0 = vx0 + v1 VS2_1 -
7049 VS2_1: vz1 = vx1 + v1 VS2_2 -
7050 VS2_2: vz2 = vx2 + v1 VS2_3 -
7051 VS2_3: vz3 = vx3 + v1 - -
7052 S2: z = x + 1 - VS2_0 */
7053
7054 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
7055 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
7056 /* Arguments are ready. Create the new vector stmt. */
7057 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
7058 {
7059 gimple *new_stmt = NULL;
7060 vop1 = ((op_type == binary_op || op_type == ternary_op)
7061 ? vec_oprnds1[i] : NULL_TREE);
7062 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
7063 if (using_emulated_vectors_p
7064 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
7065 {
7066 /* Lower the operation. This follows vector lowering. */
7067 unsigned int width = vector_element_bits (vectype);
7068 tree inner_type = TREE_TYPE (vectype);
7069 tree word_type
7070 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
7071 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
7072 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
7073 tree high_bits
7074 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
7075 tree wvop0 = make_ssa_name (word_type);
7076 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
7077 build1 (VIEW_CONVERT_EXPR,
7078 word_type, vop0));
7079 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7080 tree result_low, signs;
7081 if (code == PLUS_EXPR || code == MINUS_EXPR)
7082 {
7083 tree wvop1 = make_ssa_name (word_type);
7084 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
7085 build1 (VIEW_CONVERT_EXPR,
7086 word_type, vop1));
7087 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7088 signs = make_ssa_name (word_type);
7089 new_stmt = gimple_build_assign (signs,
7090 BIT_XOR_EXPR, wvop0, wvop1);
7091 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7092 tree b_low = make_ssa_name (word_type);
7093 new_stmt = gimple_build_assign (b_low,
7094 BIT_AND_EXPR, wvop1, low_bits);
7095 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7096 tree a_low = make_ssa_name (word_type);
7097 if (code == PLUS_EXPR)
7098 new_stmt = gimple_build_assign (a_low,
7099 BIT_AND_EXPR, wvop0, low_bits);
7100 else
7101 new_stmt = gimple_build_assign (a_low,
7102 BIT_IOR_EXPR, wvop0, high_bits);
7103 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7104 if (code == MINUS_EXPR)
7105 {
7106 new_stmt = gimple_build_assign (NULL_TREE,
7107 BIT_NOT_EXPR, signs);
7108 signs = make_ssa_name (word_type);
7109 gimple_assign_set_lhs (new_stmt, signs);
7110 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7111 }
7112 new_stmt = gimple_build_assign (NULL_TREE,
7113 BIT_AND_EXPR, signs, high_bits);
7114 signs = make_ssa_name (word_type);
7115 gimple_assign_set_lhs (new_stmt, signs);
7116 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7117 result_low = make_ssa_name (word_type);
7118 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7119 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7120 }
7121 else
7122 {
7123 tree a_low = make_ssa_name (word_type);
7124 new_stmt = gimple_build_assign (a_low,
7125 BIT_AND_EXPR, wvop0, low_bits);
7126 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7127 signs = make_ssa_name (word_type);
7128 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7129 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7130 new_stmt = gimple_build_assign (NULL_TREE,
7131 BIT_AND_EXPR, signs, high_bits);
7132 signs = make_ssa_name (word_type);
7133 gimple_assign_set_lhs (new_stmt, signs);
7134 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7135 result_low = make_ssa_name (word_type);
7136 new_stmt = gimple_build_assign (result_low,
7137 MINUS_EXPR, high_bits, a_low);
7138 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7139 }
7140 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7141 signs);
7142 result_low = make_ssa_name (word_type);
7143 gimple_assign_set_lhs (new_stmt, result_low);
7144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7145 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7146 build1 (VIEW_CONVERT_EXPR,
7147 vectype, result_low));
7148 new_temp = make_ssa_name (vectype);
7149 gimple_assign_set_lhs (new_stmt, new_temp);
7150 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7151 }
7152 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7153 {
7154 tree mask;
7155 if (masked_loop_p)
7156 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7157 vec_num * ncopies, vectype, i);
7158 else
7159 /* Dummy mask. */
7160 mask = build_minus_one_cst (truth_type_for (vectype));
7161 auto_vec<tree> vops (6);
7162 vops.quick_push (mask);
7163 vops.quick_push (vop0);
7164 if (vop1)
7165 vops.quick_push (vop1);
7166 if (vop2)
7167 vops.quick_push (vop2);
7168 if (reduc_idx >= 0)
7169 {
7170 /* Perform the operation on active elements only and take
7171 inactive elements from the reduction chain input. */
7172 gcc_assert (!vop2);
7173 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7174 }
7175 else
7176 {
7177 auto else_value = targetm.preferred_else_value
7178 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7179 vops.quick_push (else_value);
7180 }
7181 if (len_loop_p)
7182 {
7183 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7184 vec_num * ncopies, vectype, i, 1);
7185 signed char biasval
7186 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7187 tree bias = build_int_cst (intQI_type_node, biasval);
7188 vops.quick_push (len);
7189 vops.quick_push (bias);
7190 }
7191 gcall *call
7192 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7193 : cond_len_fn,
7194 vops);
7195 new_temp = make_ssa_name (vec_dest, call);
7196 gimple_call_set_lhs (call, new_temp);
7197 gimple_call_set_nothrow (call, true);
7198 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7199 new_stmt = call;
7200 }
7201 else
7202 {
7203 tree mask = NULL_TREE;
7204 /* When combining two masks check if either of them is elsewhere
7205 combined with a loop mask, if that's the case we can mark that the
7206 new combined mask doesn't need to be combined with a loop mask. */
7207 if (masked_loop_p
7208 && code == BIT_AND_EXPR
7209 && VECTOR_BOOLEAN_TYPE_P (vectype))
7210 {
7211 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7212 ncopies}))
7213 {
7214 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7215 vec_num * ncopies, vectype, i);
7216
7217 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7218 vop0, gsi);
7219 }
7220
7221 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7222 ncopies }))
7223 {
7224 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7225 vec_num * ncopies, vectype, i);
7226
7227 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7228 vop1, gsi);
7229 }
7230 }
7231
7232 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7233 new_temp = make_ssa_name (vec_dest, new_stmt);
7234 gimple_assign_set_lhs (new_stmt, new_temp);
7235 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7236 if (using_emulated_vectors_p)
7237 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7238
7239 /* Enter the combined value into the vector cond hash so we don't
7240 AND it with a loop mask again. */
7241 if (mask)
7242 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7243 }
7244
7245 if (vec_cvt_dest)
7246 {
7247 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7248 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7249 new_temp);
7250 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7251 gimple_assign_set_lhs (new_stmt, new_temp);
7252 vect_finish_stmt_generation (vinfo, stmt_info,
7253 new_stmt, gsi);
7254 }
7255
7256 if (slp_node)
7257 slp_node->push_vec_def (new_stmt);
7258 else
7259 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7260 }
7261
7262 if (!slp_node)
7263 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7264
7265 vec_oprnds0.release ();
7266 vec_oprnds1.release ();
7267 vec_oprnds2.release ();
7268
7269 return true;
7270 }
7271
7272 /* A helper function to ensure data reference DR_INFO's base alignment. */
7273
7274 static void
7275 ensure_base_align (dr_vec_info *dr_info)
7276 {
7277 /* Alignment is only analyzed for the first element of a DR group,
7278 use that to look at base alignment we need to enforce. */
7279 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7280 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7281
7282 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7283
7284 if (dr_info->base_misaligned)
7285 {
7286 tree base_decl = dr_info->base_decl;
7287
7288 // We should only be able to increase the alignment of a base object if
7289 // we know what its new alignment should be at compile time.
7290 unsigned HOST_WIDE_INT align_base_to =
7291 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7292
7293 if (decl_in_symtab_p (base_decl))
7294 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7295 else if (DECL_ALIGN (base_decl) < align_base_to)
7296 {
7297 SET_DECL_ALIGN (base_decl, align_base_to);
7298 DECL_USER_ALIGN (base_decl) = 1;
7299 }
7300 dr_info->base_misaligned = false;
7301 }
7302 }
7303
7304
7305 /* Function get_group_alias_ptr_type.
7306
7307 Return the alias type for the group starting at FIRST_STMT_INFO. */
7308
7309 static tree
7310 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7311 {
7312 struct data_reference *first_dr, *next_dr;
7313
7314 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7315 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7316 while (next_stmt_info)
7317 {
7318 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7319 if (get_alias_set (DR_REF (first_dr))
7320 != get_alias_set (DR_REF (next_dr)))
7321 {
7322 if (dump_enabled_p ())
7323 dump_printf_loc (MSG_NOTE, vect_location,
7324 "conflicting alias set types.\n");
7325 return ptr_type_node;
7326 }
7327 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7328 }
7329 return reference_alias_ptr_type (DR_REF (first_dr));
7330 }
7331
7332
7333 /* Function scan_operand_equal_p.
7334
7335 Helper function for check_scan_store. Compare two references
7336 with .GOMP_SIMD_LANE bases. */
7337
7338 static bool
7339 scan_operand_equal_p (tree ref1, tree ref2)
7340 {
7341 tree ref[2] = { ref1, ref2 };
7342 poly_int64 bitsize[2], bitpos[2];
7343 tree offset[2], base[2];
7344 for (int i = 0; i < 2; ++i)
7345 {
7346 machine_mode mode;
7347 int unsignedp, reversep, volatilep = 0;
7348 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7349 &offset[i], &mode, &unsignedp,
7350 &reversep, &volatilep);
7351 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7352 return false;
7353 if (TREE_CODE (base[i]) == MEM_REF
7354 && offset[i] == NULL_TREE
7355 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7356 {
7357 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7358 if (is_gimple_assign (def_stmt)
7359 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7360 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7361 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7362 {
7363 if (maybe_ne (mem_ref_offset (base[i]), 0))
7364 return false;
7365 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7366 offset[i] = gimple_assign_rhs2 (def_stmt);
7367 }
7368 }
7369 }
7370
7371 if (!operand_equal_p (base[0], base[1], 0))
7372 return false;
7373 if (maybe_ne (bitsize[0], bitsize[1]))
7374 return false;
7375 if (offset[0] != offset[1])
7376 {
7377 if (!offset[0] || !offset[1])
7378 return false;
7379 if (!operand_equal_p (offset[0], offset[1], 0))
7380 {
7381 tree step[2];
7382 for (int i = 0; i < 2; ++i)
7383 {
7384 step[i] = integer_one_node;
7385 if (TREE_CODE (offset[i]) == SSA_NAME)
7386 {
7387 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7388 if (is_gimple_assign (def_stmt)
7389 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7390 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7391 == INTEGER_CST))
7392 {
7393 step[i] = gimple_assign_rhs2 (def_stmt);
7394 offset[i] = gimple_assign_rhs1 (def_stmt);
7395 }
7396 }
7397 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7398 {
7399 step[i] = TREE_OPERAND (offset[i], 1);
7400 offset[i] = TREE_OPERAND (offset[i], 0);
7401 }
7402 tree rhs1 = NULL_TREE;
7403 if (TREE_CODE (offset[i]) == SSA_NAME)
7404 {
7405 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7406 if (gimple_assign_cast_p (def_stmt))
7407 rhs1 = gimple_assign_rhs1 (def_stmt);
7408 }
7409 else if (CONVERT_EXPR_P (offset[i]))
7410 rhs1 = TREE_OPERAND (offset[i], 0);
7411 if (rhs1
7412 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7413 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7414 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7415 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7416 offset[i] = rhs1;
7417 }
7418 if (!operand_equal_p (offset[0], offset[1], 0)
7419 || !operand_equal_p (step[0], step[1], 0))
7420 return false;
7421 }
7422 }
7423 return true;
7424 }
7425
7426
7427 enum scan_store_kind {
7428 /* Normal permutation. */
7429 scan_store_kind_perm,
7430
7431 /* Whole vector left shift permutation with zero init. */
7432 scan_store_kind_lshift_zero,
7433
7434 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7435 scan_store_kind_lshift_cond
7436 };
7437
7438 /* Function check_scan_store.
7439
7440 Verify if we can perform the needed permutations or whole vector shifts.
7441 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7442 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7443 to do at each step. */
7444
7445 static int
7446 scan_store_can_perm_p (tree vectype, tree init,
7447 vec<enum scan_store_kind> *use_whole_vector = NULL)
7448 {
7449 enum machine_mode vec_mode = TYPE_MODE (vectype);
7450 unsigned HOST_WIDE_INT nunits;
7451 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7452 return -1;
7453 int units_log2 = exact_log2 (nunits);
7454 if (units_log2 <= 0)
7455 return -1;
7456
7457 int i;
7458 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7459 for (i = 0; i <= units_log2; ++i)
7460 {
7461 unsigned HOST_WIDE_INT j, k;
7462 enum scan_store_kind kind = scan_store_kind_perm;
7463 vec_perm_builder sel (nunits, nunits, 1);
7464 sel.quick_grow (nunits);
7465 if (i == units_log2)
7466 {
7467 for (j = 0; j < nunits; ++j)
7468 sel[j] = nunits - 1;
7469 }
7470 else
7471 {
7472 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7473 sel[j] = j;
7474 for (k = 0; j < nunits; ++j, ++k)
7475 sel[j] = nunits + k;
7476 }
7477 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7478 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7479 {
7480 if (i == units_log2)
7481 return -1;
7482
7483 if (whole_vector_shift_kind == scan_store_kind_perm)
7484 {
7485 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7486 return -1;
7487 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7488 /* Whole vector shifts shift in zeros, so if init is all zero
7489 constant, there is no need to do anything further. */
7490 if ((TREE_CODE (init) != INTEGER_CST
7491 && TREE_CODE (init) != REAL_CST)
7492 || !initializer_zerop (init))
7493 {
7494 tree masktype = truth_type_for (vectype);
7495 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7496 return -1;
7497 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7498 }
7499 }
7500 kind = whole_vector_shift_kind;
7501 }
7502 if (use_whole_vector)
7503 {
7504 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7505 use_whole_vector->safe_grow_cleared (i, true);
7506 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7507 use_whole_vector->safe_push (kind);
7508 }
7509 }
7510
7511 return units_log2;
7512 }
7513
7514
7515 /* Function check_scan_store.
7516
7517 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7518
7519 static bool
7520 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7521 enum vect_def_type rhs_dt, bool slp, tree mask,
7522 vect_memory_access_type memory_access_type)
7523 {
7524 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7525 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7526 tree ref_type;
7527
7528 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7529 if (slp
7530 || mask
7531 || memory_access_type != VMAT_CONTIGUOUS
7532 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7533 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7534 || loop_vinfo == NULL
7535 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7536 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7537 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7538 || !integer_zerop (DR_INIT (dr_info->dr))
7539 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7540 || !alias_sets_conflict_p (get_alias_set (vectype),
7541 get_alias_set (TREE_TYPE (ref_type))))
7542 {
7543 if (dump_enabled_p ())
7544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7545 "unsupported OpenMP scan store.\n");
7546 return false;
7547 }
7548
7549 /* We need to pattern match code built by OpenMP lowering and simplified
7550 by following optimizations into something we can handle.
7551 #pragma omp simd reduction(inscan,+:r)
7552 for (...)
7553 {
7554 r += something ();
7555 #pragma omp scan inclusive (r)
7556 use (r);
7557 }
7558 shall have body with:
7559 // Initialization for input phase, store the reduction initializer:
7560 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7561 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7562 D.2042[_21] = 0;
7563 // Actual input phase:
7564 ...
7565 r.0_5 = D.2042[_20];
7566 _6 = _4 + r.0_5;
7567 D.2042[_20] = _6;
7568 // Initialization for scan phase:
7569 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7570 _26 = D.2043[_25];
7571 _27 = D.2042[_25];
7572 _28 = _26 + _27;
7573 D.2043[_25] = _28;
7574 D.2042[_25] = _28;
7575 // Actual scan phase:
7576 ...
7577 r.1_8 = D.2042[_20];
7578 ...
7579 The "omp simd array" variable D.2042 holds the privatized copy used
7580 inside of the loop and D.2043 is another one that holds copies of
7581 the current original list item. The separate GOMP_SIMD_LANE ifn
7582 kinds are there in order to allow optimizing the initializer store
7583 and combiner sequence, e.g. if it is originally some C++ish user
7584 defined reduction, but allow the vectorizer to pattern recognize it
7585 and turn into the appropriate vectorized scan.
7586
7587 For exclusive scan, this is slightly different:
7588 #pragma omp simd reduction(inscan,+:r)
7589 for (...)
7590 {
7591 use (r);
7592 #pragma omp scan exclusive (r)
7593 r += something ();
7594 }
7595 shall have body with:
7596 // Initialization for input phase, store the reduction initializer:
7597 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7598 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7599 D.2042[_21] = 0;
7600 // Actual input phase:
7601 ...
7602 r.0_5 = D.2042[_20];
7603 _6 = _4 + r.0_5;
7604 D.2042[_20] = _6;
7605 // Initialization for scan phase:
7606 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7607 _26 = D.2043[_25];
7608 D.2044[_25] = _26;
7609 _27 = D.2042[_25];
7610 _28 = _26 + _27;
7611 D.2043[_25] = _28;
7612 // Actual scan phase:
7613 ...
7614 r.1_8 = D.2044[_20];
7615 ... */
7616
7617 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7618 {
7619 /* Match the D.2042[_21] = 0; store above. Just require that
7620 it is a constant or external definition store. */
7621 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7622 {
7623 fail_init:
7624 if (dump_enabled_p ())
7625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7626 "unsupported OpenMP scan initializer store.\n");
7627 return false;
7628 }
7629
7630 if (! loop_vinfo->scan_map)
7631 loop_vinfo->scan_map = new hash_map<tree, tree>;
7632 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7633 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7634 if (cached)
7635 goto fail_init;
7636 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7637
7638 /* These stores can be vectorized normally. */
7639 return true;
7640 }
7641
7642 if (rhs_dt != vect_internal_def)
7643 {
7644 fail:
7645 if (dump_enabled_p ())
7646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7647 "unsupported OpenMP scan combiner pattern.\n");
7648 return false;
7649 }
7650
7651 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7652 tree rhs = gimple_assign_rhs1 (stmt);
7653 if (TREE_CODE (rhs) != SSA_NAME)
7654 goto fail;
7655
7656 gimple *other_store_stmt = NULL;
7657 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7658 bool inscan_var_store
7659 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7660
7661 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7662 {
7663 if (!inscan_var_store)
7664 {
7665 use_operand_p use_p;
7666 imm_use_iterator iter;
7667 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7668 {
7669 gimple *use_stmt = USE_STMT (use_p);
7670 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7671 continue;
7672 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7673 || !is_gimple_assign (use_stmt)
7674 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7675 || other_store_stmt
7676 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7677 goto fail;
7678 other_store_stmt = use_stmt;
7679 }
7680 if (other_store_stmt == NULL)
7681 goto fail;
7682 rhs = gimple_assign_lhs (other_store_stmt);
7683 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7684 goto fail;
7685 }
7686 }
7687 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7688 {
7689 use_operand_p use_p;
7690 imm_use_iterator iter;
7691 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7692 {
7693 gimple *use_stmt = USE_STMT (use_p);
7694 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7695 continue;
7696 if (other_store_stmt)
7697 goto fail;
7698 other_store_stmt = use_stmt;
7699 }
7700 }
7701 else
7702 goto fail;
7703
7704 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7705 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7706 || !is_gimple_assign (def_stmt)
7707 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7708 goto fail;
7709
7710 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7711 /* For pointer addition, we should use the normal plus for the vector
7712 operation. */
7713 switch (code)
7714 {
7715 case POINTER_PLUS_EXPR:
7716 code = PLUS_EXPR;
7717 break;
7718 case MULT_HIGHPART_EXPR:
7719 goto fail;
7720 default:
7721 break;
7722 }
7723 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7724 goto fail;
7725
7726 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7727 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7728 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7729 goto fail;
7730
7731 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7732 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7733 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7734 || !gimple_assign_load_p (load1_stmt)
7735 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7736 || !gimple_assign_load_p (load2_stmt))
7737 goto fail;
7738
7739 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7740 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7741 if (load1_stmt_info == NULL
7742 || load2_stmt_info == NULL
7743 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7744 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7745 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7746 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7747 goto fail;
7748
7749 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7750 {
7751 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7752 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7753 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7754 goto fail;
7755 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7756 tree lrhs;
7757 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7758 lrhs = rhs1;
7759 else
7760 lrhs = rhs2;
7761 use_operand_p use_p;
7762 imm_use_iterator iter;
7763 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7764 {
7765 gimple *use_stmt = USE_STMT (use_p);
7766 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7767 continue;
7768 if (other_store_stmt)
7769 goto fail;
7770 other_store_stmt = use_stmt;
7771 }
7772 }
7773
7774 if (other_store_stmt == NULL)
7775 goto fail;
7776 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7777 || !gimple_store_p (other_store_stmt))
7778 goto fail;
7779
7780 stmt_vec_info other_store_stmt_info
7781 = loop_vinfo->lookup_stmt (other_store_stmt);
7782 if (other_store_stmt_info == NULL
7783 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7784 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7785 goto fail;
7786
7787 gimple *stmt1 = stmt;
7788 gimple *stmt2 = other_store_stmt;
7789 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7790 std::swap (stmt1, stmt2);
7791 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7792 gimple_assign_rhs1 (load2_stmt)))
7793 {
7794 std::swap (rhs1, rhs2);
7795 std::swap (load1_stmt, load2_stmt);
7796 std::swap (load1_stmt_info, load2_stmt_info);
7797 }
7798 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7799 gimple_assign_rhs1 (load1_stmt)))
7800 goto fail;
7801
7802 tree var3 = NULL_TREE;
7803 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7804 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7805 gimple_assign_rhs1 (load2_stmt)))
7806 goto fail;
7807 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7808 {
7809 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7810 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7811 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7812 goto fail;
7813 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7814 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7815 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7816 || lookup_attribute ("omp simd inscan exclusive",
7817 DECL_ATTRIBUTES (var3)))
7818 goto fail;
7819 }
7820
7821 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7822 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7823 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7824 goto fail;
7825
7826 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7827 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7828 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7829 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7830 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7831 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7832 goto fail;
7833
7834 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7835 std::swap (var1, var2);
7836
7837 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7838 {
7839 if (!lookup_attribute ("omp simd inscan exclusive",
7840 DECL_ATTRIBUTES (var1)))
7841 goto fail;
7842 var1 = var3;
7843 }
7844
7845 if (loop_vinfo->scan_map == NULL)
7846 goto fail;
7847 tree *init = loop_vinfo->scan_map->get (var1);
7848 if (init == NULL)
7849 goto fail;
7850
7851 /* The IL is as expected, now check if we can actually vectorize it.
7852 Inclusive scan:
7853 _26 = D.2043[_25];
7854 _27 = D.2042[_25];
7855 _28 = _26 + _27;
7856 D.2043[_25] = _28;
7857 D.2042[_25] = _28;
7858 should be vectorized as (where _40 is the vectorized rhs
7859 from the D.2042[_21] = 0; store):
7860 _30 = MEM <vector(8) int> [(int *)&D.2043];
7861 _31 = MEM <vector(8) int> [(int *)&D.2042];
7862 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7863 _33 = _31 + _32;
7864 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7865 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7866 _35 = _33 + _34;
7867 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7868 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7869 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7870 _37 = _35 + _36;
7871 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7872 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7873 _38 = _30 + _37;
7874 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7875 MEM <vector(8) int> [(int *)&D.2043] = _39;
7876 MEM <vector(8) int> [(int *)&D.2042] = _38;
7877 Exclusive scan:
7878 _26 = D.2043[_25];
7879 D.2044[_25] = _26;
7880 _27 = D.2042[_25];
7881 _28 = _26 + _27;
7882 D.2043[_25] = _28;
7883 should be vectorized as (where _40 is the vectorized rhs
7884 from the D.2042[_21] = 0; store):
7885 _30 = MEM <vector(8) int> [(int *)&D.2043];
7886 _31 = MEM <vector(8) int> [(int *)&D.2042];
7887 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7888 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7889 _34 = _32 + _33;
7890 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7891 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7892 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7893 _36 = _34 + _35;
7894 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7895 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7896 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7897 _38 = _36 + _37;
7898 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7899 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7900 _39 = _30 + _38;
7901 _50 = _31 + _39;
7902 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7903 MEM <vector(8) int> [(int *)&D.2044] = _39;
7904 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7905 enum machine_mode vec_mode = TYPE_MODE (vectype);
7906 optab optab = optab_for_tree_code (code, vectype, optab_default);
7907 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7908 goto fail;
7909
7910 int units_log2 = scan_store_can_perm_p (vectype, *init);
7911 if (units_log2 == -1)
7912 goto fail;
7913
7914 return true;
7915 }
7916
7917
7918 /* Function vectorizable_scan_store.
7919
7920 Helper of vectorizable_score, arguments like on vectorizable_store.
7921 Handle only the transformation, checking is done in check_scan_store. */
7922
7923 static bool
7924 vectorizable_scan_store (vec_info *vinfo,
7925 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7926 gimple **vec_stmt, int ncopies)
7927 {
7928 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7929 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7930 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7931 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7932
7933 if (dump_enabled_p ())
7934 dump_printf_loc (MSG_NOTE, vect_location,
7935 "transform scan store. ncopies = %d\n", ncopies);
7936
7937 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7938 tree rhs = gimple_assign_rhs1 (stmt);
7939 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7940
7941 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7942 bool inscan_var_store
7943 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7944
7945 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7946 {
7947 use_operand_p use_p;
7948 imm_use_iterator iter;
7949 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7950 {
7951 gimple *use_stmt = USE_STMT (use_p);
7952 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7953 continue;
7954 rhs = gimple_assign_lhs (use_stmt);
7955 break;
7956 }
7957 }
7958
7959 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7960 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7961 if (code == POINTER_PLUS_EXPR)
7962 code = PLUS_EXPR;
7963 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7964 && commutative_tree_code (code));
7965 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7966 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7967 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7968 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7969 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7970 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7971 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7972 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7973 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7974 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7975 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7976
7977 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7978 {
7979 std::swap (rhs1, rhs2);
7980 std::swap (var1, var2);
7981 std::swap (load1_dr_info, load2_dr_info);
7982 }
7983
7984 tree *init = loop_vinfo->scan_map->get (var1);
7985 gcc_assert (init);
7986
7987 unsigned HOST_WIDE_INT nunits;
7988 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7989 gcc_unreachable ();
7990 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7991 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7992 gcc_assert (units_log2 > 0);
7993 auto_vec<tree, 16> perms;
7994 perms.quick_grow (units_log2 + 1);
7995 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7996 for (int i = 0; i <= units_log2; ++i)
7997 {
7998 unsigned HOST_WIDE_INT j, k;
7999 vec_perm_builder sel (nunits, nunits, 1);
8000 sel.quick_grow (nunits);
8001 if (i == units_log2)
8002 for (j = 0; j < nunits; ++j)
8003 sel[j] = nunits - 1;
8004 else
8005 {
8006 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
8007 sel[j] = j;
8008 for (k = 0; j < nunits; ++j, ++k)
8009 sel[j] = nunits + k;
8010 }
8011 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
8012 if (!use_whole_vector.is_empty ()
8013 && use_whole_vector[i] != scan_store_kind_perm)
8014 {
8015 if (zero_vec == NULL_TREE)
8016 zero_vec = build_zero_cst (vectype);
8017 if (masktype == NULL_TREE
8018 && use_whole_vector[i] == scan_store_kind_lshift_cond)
8019 masktype = truth_type_for (vectype);
8020 perms[i] = vect_gen_perm_mask_any (vectype, indices);
8021 }
8022 else
8023 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
8024 }
8025
8026 tree vec_oprnd1 = NULL_TREE;
8027 tree vec_oprnd2 = NULL_TREE;
8028 tree vec_oprnd3 = NULL_TREE;
8029 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
8030 tree dataref_offset = build_int_cst (ref_type, 0);
8031 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
8032 vectype, VMAT_CONTIGUOUS);
8033 tree ldataref_ptr = NULL_TREE;
8034 tree orig = NULL_TREE;
8035 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
8036 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
8037 auto_vec<tree> vec_oprnds1;
8038 auto_vec<tree> vec_oprnds2;
8039 auto_vec<tree> vec_oprnds3;
8040 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
8041 *init, &vec_oprnds1,
8042 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
8043 rhs2, &vec_oprnds3);
8044 for (int j = 0; j < ncopies; j++)
8045 {
8046 vec_oprnd1 = vec_oprnds1[j];
8047 if (ldataref_ptr == NULL)
8048 vec_oprnd2 = vec_oprnds2[j];
8049 vec_oprnd3 = vec_oprnds3[j];
8050 if (j == 0)
8051 orig = vec_oprnd3;
8052 else if (!inscan_var_store)
8053 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8054
8055 if (ldataref_ptr)
8056 {
8057 vec_oprnd2 = make_ssa_name (vectype);
8058 tree data_ref = fold_build2 (MEM_REF, vectype,
8059 unshare_expr (ldataref_ptr),
8060 dataref_offset);
8061 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
8062 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
8063 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8064 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8065 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8066 }
8067
8068 tree v = vec_oprnd2;
8069 for (int i = 0; i < units_log2; ++i)
8070 {
8071 tree new_temp = make_ssa_name (vectype);
8072 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8073 (zero_vec
8074 && (use_whole_vector[i]
8075 != scan_store_kind_perm))
8076 ? zero_vec : vec_oprnd1, v,
8077 perms[i]);
8078 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8079 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8080 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8081
8082 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8083 {
8084 /* Whole vector shift shifted in zero bits, but if *init
8085 is not initializer_zerop, we need to replace those elements
8086 with elements from vec_oprnd1. */
8087 tree_vector_builder vb (masktype, nunits, 1);
8088 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8089 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8090 ? boolean_false_node : boolean_true_node);
8091
8092 tree new_temp2 = make_ssa_name (vectype);
8093 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8094 new_temp, vec_oprnd1);
8095 vect_finish_stmt_generation (vinfo, stmt_info,
8096 g, gsi);
8097 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8098 new_temp = new_temp2;
8099 }
8100
8101 /* For exclusive scan, perform the perms[i] permutation once
8102 more. */
8103 if (i == 0
8104 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8105 && v == vec_oprnd2)
8106 {
8107 v = new_temp;
8108 --i;
8109 continue;
8110 }
8111
8112 tree new_temp2 = make_ssa_name (vectype);
8113 g = gimple_build_assign (new_temp2, code, v, new_temp);
8114 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8115 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8116
8117 v = new_temp2;
8118 }
8119
8120 tree new_temp = make_ssa_name (vectype);
8121 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8122 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8123 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8124
8125 tree last_perm_arg = new_temp;
8126 /* For exclusive scan, new_temp computed above is the exclusive scan
8127 prefix sum. Turn it into inclusive prefix sum for the broadcast
8128 of the last element into orig. */
8129 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8130 {
8131 last_perm_arg = make_ssa_name (vectype);
8132 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8133 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8134 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8135 }
8136
8137 orig = make_ssa_name (vectype);
8138 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8139 last_perm_arg, perms[units_log2]);
8140 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8141 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8142
8143 if (!inscan_var_store)
8144 {
8145 tree data_ref = fold_build2 (MEM_REF, vectype,
8146 unshare_expr (dataref_ptr),
8147 dataref_offset);
8148 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8149 g = gimple_build_assign (data_ref, new_temp);
8150 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8151 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8152 }
8153 }
8154
8155 if (inscan_var_store)
8156 for (int j = 0; j < ncopies; j++)
8157 {
8158 if (j != 0)
8159 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8160
8161 tree data_ref = fold_build2 (MEM_REF, vectype,
8162 unshare_expr (dataref_ptr),
8163 dataref_offset);
8164 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8165 gimple *g = gimple_build_assign (data_ref, orig);
8166 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8167 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8168 }
8169 return true;
8170 }
8171
8172
8173 /* Function vectorizable_store.
8174
8175 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8176 that can be vectorized.
8177 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8178 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8179 Return true if STMT_INFO is vectorizable in this way. */
8180
8181 static bool
8182 vectorizable_store (vec_info *vinfo,
8183 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8184 gimple **vec_stmt, slp_tree slp_node,
8185 stmt_vector_for_cost *cost_vec)
8186 {
8187 tree data_ref;
8188 tree op;
8189 tree vec_oprnd = NULL_TREE;
8190 tree elem_type;
8191 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8192 class loop *loop = NULL;
8193 machine_mode vec_mode;
8194 tree dummy;
8195 enum vect_def_type rhs_dt = vect_unknown_def_type;
8196 enum vect_def_type mask_dt = vect_unknown_def_type;
8197 tree dataref_ptr = NULL_TREE;
8198 tree dataref_offset = NULL_TREE;
8199 gimple *ptr_incr = NULL;
8200 int ncopies;
8201 int j;
8202 stmt_vec_info first_stmt_info;
8203 bool grouped_store;
8204 unsigned int group_size, i;
8205 bool slp = (slp_node != NULL);
8206 unsigned int vec_num;
8207 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8208 tree aggr_type;
8209 gather_scatter_info gs_info;
8210 poly_uint64 vf;
8211 vec_load_store_type vls_type;
8212 tree ref_type;
8213
8214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8215 return false;
8216
8217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8218 && ! vec_stmt)
8219 return false;
8220
8221 /* Is vectorizable store? */
8222
8223 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8224 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8225 {
8226 tree scalar_dest = gimple_assign_lhs (assign);
8227 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8228 && is_pattern_stmt_p (stmt_info))
8229 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8230 if (TREE_CODE (scalar_dest) != ARRAY_REF
8231 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8232 && TREE_CODE (scalar_dest) != INDIRECT_REF
8233 && TREE_CODE (scalar_dest) != COMPONENT_REF
8234 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8235 && TREE_CODE (scalar_dest) != REALPART_EXPR
8236 && TREE_CODE (scalar_dest) != MEM_REF)
8237 return false;
8238 }
8239 else
8240 {
8241 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8242 if (!call || !gimple_call_internal_p (call))
8243 return false;
8244
8245 internal_fn ifn = gimple_call_internal_fn (call);
8246 if (!internal_store_fn_p (ifn))
8247 return false;
8248
8249 int mask_index = internal_fn_mask_index (ifn);
8250 if (mask_index >= 0 && slp_node)
8251 mask_index = vect_slp_child_index_for_operand (call, mask_index);
8252 if (mask_index >= 0
8253 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8254 &mask, NULL, &mask_dt, &mask_vectype))
8255 return false;
8256 }
8257
8258 op = vect_get_store_rhs (stmt_info);
8259
8260 /* Cannot have hybrid store SLP -- that would mean storing to the
8261 same location twice. */
8262 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8263
8264 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8265 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8266
8267 if (loop_vinfo)
8268 {
8269 loop = LOOP_VINFO_LOOP (loop_vinfo);
8270 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8271 }
8272 else
8273 vf = 1;
8274
8275 /* Multiple types in SLP are handled by creating the appropriate number of
8276 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8277 case of SLP. */
8278 if (slp)
8279 ncopies = 1;
8280 else
8281 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8282
8283 gcc_assert (ncopies >= 1);
8284
8285 /* FORNOW. This restriction should be relaxed. */
8286 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8287 {
8288 if (dump_enabled_p ())
8289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8290 "multiple types in nested loop.\n");
8291 return false;
8292 }
8293
8294 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8295 op, &rhs_dt, &rhs_vectype, &vls_type))
8296 return false;
8297
8298 elem_type = TREE_TYPE (vectype);
8299 vec_mode = TYPE_MODE (vectype);
8300
8301 if (!STMT_VINFO_DATA_REF (stmt_info))
8302 return false;
8303
8304 vect_memory_access_type memory_access_type;
8305 enum dr_alignment_support alignment_support_scheme;
8306 int misalignment;
8307 poly_int64 poffset;
8308 internal_fn lanes_ifn;
8309 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8310 ncopies, &memory_access_type, &poffset,
8311 &alignment_support_scheme, &misalignment, &gs_info,
8312 &lanes_ifn))
8313 return false;
8314
8315 if (mask)
8316 {
8317 if (memory_access_type == VMAT_CONTIGUOUS)
8318 {
8319 if (!VECTOR_MODE_P (vec_mode)
8320 || !can_vec_mask_load_store_p (vec_mode,
8321 TYPE_MODE (mask_vectype), false))
8322 return false;
8323 }
8324 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8325 && (memory_access_type != VMAT_GATHER_SCATTER
8326 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8327 {
8328 if (dump_enabled_p ())
8329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8330 "unsupported access type for masked store.\n");
8331 return false;
8332 }
8333 else if (memory_access_type == VMAT_GATHER_SCATTER
8334 && gs_info.ifn == IFN_LAST
8335 && !gs_info.decl)
8336 {
8337 if (dump_enabled_p ())
8338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8339 "unsupported masked emulated scatter.\n");
8340 return false;
8341 }
8342 }
8343 else
8344 {
8345 /* FORNOW. In some cases can vectorize even if data-type not supported
8346 (e.g. - array initialization with 0). */
8347 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8348 return false;
8349 }
8350
8351 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8352 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8353 && memory_access_type != VMAT_GATHER_SCATTER
8354 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8355 if (grouped_store)
8356 {
8357 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8358 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8359 group_size = DR_GROUP_SIZE (first_stmt_info);
8360 }
8361 else
8362 {
8363 first_stmt_info = stmt_info;
8364 first_dr_info = dr_info;
8365 group_size = vec_num = 1;
8366 }
8367
8368 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8369 {
8370 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8371 memory_access_type))
8372 return false;
8373 }
8374
8375 if (!vec_stmt) /* transformation not required. */
8376 {
8377 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8378
8379 if (loop_vinfo
8380 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8381 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8382 vls_type, group_size,
8383 memory_access_type, &gs_info,
8384 mask);
8385
8386 if (slp_node
8387 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8388 vectype))
8389 {
8390 if (dump_enabled_p ())
8391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8392 "incompatible vector types for invariants\n");
8393 return false;
8394 }
8395
8396 if (dump_enabled_p ()
8397 && memory_access_type != VMAT_ELEMENTWISE
8398 && memory_access_type != VMAT_GATHER_SCATTER
8399 && alignment_support_scheme != dr_aligned)
8400 dump_printf_loc (MSG_NOTE, vect_location,
8401 "Vectorizing an unaligned access.\n");
8402
8403 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8404 vect_model_store_cost (vinfo, stmt_info, ncopies,
8405 memory_access_type, &gs_info,
8406 alignment_support_scheme,
8407 misalignment, vls_type, slp_node, cost_vec);
8408 return true;
8409 }
8410 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8411
8412 /* Transform. */
8413
8414 ensure_base_align (dr_info);
8415
8416 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8417 {
8418 vect_build_scatter_store_calls (vinfo, stmt_info, gsi, vec_stmt,
8419 &gs_info, mask);
8420 return true;
8421 }
8422 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8423 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8424
8425 if (grouped_store)
8426 {
8427 /* FORNOW */
8428 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8429
8430 if (slp)
8431 {
8432 grouped_store = false;
8433 /* VEC_NUM is the number of vect stmts to be created for this
8434 group. */
8435 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8436 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8437 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8438 == first_stmt_info);
8439 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8440 op = vect_get_store_rhs (first_stmt_info);
8441 }
8442 else
8443 /* VEC_NUM is the number of vect stmts to be created for this
8444 group. */
8445 vec_num = group_size;
8446
8447 ref_type = get_group_alias_ptr_type (first_stmt_info);
8448 }
8449 else
8450 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8451
8452 if (dump_enabled_p ())
8453 dump_printf_loc (MSG_NOTE, vect_location,
8454 "transform store. ncopies = %d\n", ncopies);
8455
8456 if (memory_access_type == VMAT_ELEMENTWISE
8457 || memory_access_type == VMAT_STRIDED_SLP)
8458 {
8459 gimple_stmt_iterator incr_gsi;
8460 bool insert_after;
8461 gimple *incr;
8462 tree offvar;
8463 tree ivstep;
8464 tree running_off;
8465 tree stride_base, stride_step, alias_off;
8466 tree vec_oprnd;
8467 tree dr_offset;
8468 unsigned int g;
8469 /* Checked by get_load_store_type. */
8470 unsigned int const_nunits = nunits.to_constant ();
8471
8472 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8473 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8474
8475 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8476 stride_base
8477 = fold_build_pointer_plus
8478 (DR_BASE_ADDRESS (first_dr_info->dr),
8479 size_binop (PLUS_EXPR,
8480 convert_to_ptrofftype (dr_offset),
8481 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8482 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8483
8484 /* For a store with loop-invariant (but other than power-of-2)
8485 stride (i.e. not a grouped access) like so:
8486
8487 for (i = 0; i < n; i += stride)
8488 array[i] = ...;
8489
8490 we generate a new induction variable and new stores from
8491 the components of the (vectorized) rhs:
8492
8493 for (j = 0; ; j += VF*stride)
8494 vectemp = ...;
8495 tmp1 = vectemp[0];
8496 array[j] = tmp1;
8497 tmp2 = vectemp[1];
8498 array[j + stride] = tmp2;
8499 ...
8500 */
8501
8502 unsigned nstores = const_nunits;
8503 unsigned lnel = 1;
8504 tree ltype = elem_type;
8505 tree lvectype = vectype;
8506 if (slp)
8507 {
8508 if (group_size < const_nunits
8509 && const_nunits % group_size == 0)
8510 {
8511 nstores = const_nunits / group_size;
8512 lnel = group_size;
8513 ltype = build_vector_type (elem_type, group_size);
8514 lvectype = vectype;
8515
8516 /* First check if vec_extract optab doesn't support extraction
8517 of vector elts directly. */
8518 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8519 machine_mode vmode;
8520 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8521 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8522 group_size).exists (&vmode)
8523 || (convert_optab_handler (vec_extract_optab,
8524 TYPE_MODE (vectype), vmode)
8525 == CODE_FOR_nothing))
8526 {
8527 /* Try to avoid emitting an extract of vector elements
8528 by performing the extracts using an integer type of the
8529 same size, extracting from a vector of those and then
8530 re-interpreting it as the original vector type if
8531 supported. */
8532 unsigned lsize
8533 = group_size * GET_MODE_BITSIZE (elmode);
8534 unsigned int lnunits = const_nunits / group_size;
8535 /* If we can't construct such a vector fall back to
8536 element extracts from the original vector type and
8537 element size stores. */
8538 if (int_mode_for_size (lsize, 0).exists (&elmode)
8539 && VECTOR_MODE_P (TYPE_MODE (vectype))
8540 && related_vector_mode (TYPE_MODE (vectype), elmode,
8541 lnunits).exists (&vmode)
8542 && (convert_optab_handler (vec_extract_optab,
8543 vmode, elmode)
8544 != CODE_FOR_nothing))
8545 {
8546 nstores = lnunits;
8547 lnel = group_size;
8548 ltype = build_nonstandard_integer_type (lsize, 1);
8549 lvectype = build_vector_type (ltype, nstores);
8550 }
8551 /* Else fall back to vector extraction anyway.
8552 Fewer stores are more important than avoiding spilling
8553 of the vector we extract from. Compared to the
8554 construction case in vectorizable_load no store-forwarding
8555 issue exists here for reasonable archs. */
8556 }
8557 }
8558 else if (group_size >= const_nunits
8559 && group_size % const_nunits == 0)
8560 {
8561 nstores = 1;
8562 lnel = const_nunits;
8563 ltype = vectype;
8564 lvectype = vectype;
8565 }
8566 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8567 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8568 }
8569
8570 ivstep = stride_step;
8571 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8572 build_int_cst (TREE_TYPE (ivstep), vf));
8573
8574 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8575
8576 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8577 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8578 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
8579 loop, &incr_gsi, insert_after,
8580 &offvar, NULL);
8581 incr = gsi_stmt (incr_gsi);
8582
8583 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8584
8585 alias_off = build_int_cst (ref_type, 0);
8586 stmt_vec_info next_stmt_info = first_stmt_info;
8587 auto_vec<tree> vec_oprnds (ncopies);
8588 for (g = 0; g < group_size; g++)
8589 {
8590 running_off = offvar;
8591 if (g)
8592 {
8593 tree size = TYPE_SIZE_UNIT (ltype);
8594 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8595 size);
8596 tree newoff = copy_ssa_name (running_off, NULL);
8597 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8598 running_off, pos);
8599 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8600 running_off = newoff;
8601 }
8602 if (!slp)
8603 op = vect_get_store_rhs (next_stmt_info);
8604 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8605 op, &vec_oprnds);
8606 unsigned int group_el = 0;
8607 unsigned HOST_WIDE_INT
8608 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8609 for (j = 0; j < ncopies; j++)
8610 {
8611 vec_oprnd = vec_oprnds[j];
8612 /* Pun the vector to extract from if necessary. */
8613 if (lvectype != vectype)
8614 {
8615 tree tem = make_ssa_name (lvectype);
8616 gimple *pun
8617 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8618 lvectype, vec_oprnd));
8619 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8620 vec_oprnd = tem;
8621 }
8622 for (i = 0; i < nstores; i++)
8623 {
8624 tree newref, newoff;
8625 gimple *incr, *assign;
8626 tree size = TYPE_SIZE (ltype);
8627 /* Extract the i'th component. */
8628 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8629 bitsize_int (i), size);
8630 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8631 size, pos);
8632
8633 elem = force_gimple_operand_gsi (gsi, elem, true,
8634 NULL_TREE, true,
8635 GSI_SAME_STMT);
8636
8637 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8638 group_el * elsz);
8639 newref = build2 (MEM_REF, ltype,
8640 running_off, this_off);
8641 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8642
8643 /* And store it to *running_off. */
8644 assign = gimple_build_assign (newref, elem);
8645 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8646
8647 group_el += lnel;
8648 if (! slp
8649 || group_el == group_size)
8650 {
8651 newoff = copy_ssa_name (running_off, NULL);
8652 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8653 running_off, stride_step);
8654 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8655
8656 running_off = newoff;
8657 group_el = 0;
8658 }
8659 if (g == group_size - 1
8660 && !slp)
8661 {
8662 if (j == 0 && i == 0)
8663 *vec_stmt = assign;
8664 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8665 }
8666 }
8667 }
8668 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8669 vec_oprnds.truncate(0);
8670 if (slp)
8671 break;
8672 }
8673
8674 return true;
8675 }
8676
8677 gcc_assert (alignment_support_scheme);
8678 vec_loop_masks *loop_masks
8679 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8680 ? &LOOP_VINFO_MASKS (loop_vinfo)
8681 : NULL);
8682 vec_loop_lens *loop_lens
8683 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8684 ? &LOOP_VINFO_LENS (loop_vinfo)
8685 : NULL);
8686
8687 /* Shouldn't go with length-based approach if fully masked. */
8688 gcc_assert (!loop_lens || !loop_masks);
8689
8690 /* Targets with store-lane instructions must not require explicit
8691 realignment. vect_supportable_dr_alignment always returns either
8692 dr_aligned or dr_unaligned_supported for masked operations. */
8693 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8694 && !mask
8695 && !loop_masks)
8696 || alignment_support_scheme == dr_aligned
8697 || alignment_support_scheme == dr_unaligned_supported);
8698
8699 tree offset = NULL_TREE;
8700 if (!known_eq (poffset, 0))
8701 offset = size_int (poffset);
8702
8703 tree bump;
8704 tree vec_offset = NULL_TREE;
8705 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8706 {
8707 aggr_type = NULL_TREE;
8708 bump = NULL_TREE;
8709 }
8710 else if (memory_access_type == VMAT_GATHER_SCATTER)
8711 {
8712 aggr_type = elem_type;
8713 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8714 &bump, &vec_offset, loop_lens);
8715 }
8716 else
8717 {
8718 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8719 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8720 else
8721 aggr_type = vectype;
8722 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8723 memory_access_type, loop_lens);
8724 }
8725
8726 if (mask)
8727 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8728
8729 /* In case the vectorization factor (VF) is bigger than the number
8730 of elements that we can fit in a vectype (nunits), we have to generate
8731 more than one vector stmt - i.e - we need to "unroll" the
8732 vector stmt by a factor VF/nunits. */
8733
8734 /* In case of interleaving (non-unit grouped access):
8735
8736 S1: &base + 2 = x2
8737 S2: &base = x0
8738 S3: &base + 1 = x1
8739 S4: &base + 3 = x3
8740
8741 We create vectorized stores starting from base address (the access of the
8742 first stmt in the chain (S2 in the above example), when the last store stmt
8743 of the chain (S4) is reached:
8744
8745 VS1: &base = vx2
8746 VS2: &base + vec_size*1 = vx0
8747 VS3: &base + vec_size*2 = vx1
8748 VS4: &base + vec_size*3 = vx3
8749
8750 Then permutation statements are generated:
8751
8752 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8753 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8754 ...
8755
8756 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8757 (the order of the data-refs in the output of vect_permute_store_chain
8758 corresponds to the order of scalar stmts in the interleaving chain - see
8759 the documentation of vect_permute_store_chain()).
8760
8761 In case of both multiple types and interleaving, above vector stores and
8762 permutation stmts are created for every copy. The result vector stmts are
8763 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8764 STMT_VINFO_RELATED_STMT for the next copies.
8765 */
8766
8767 auto_vec<tree> dr_chain (group_size);
8768 auto_vec<tree> vec_masks;
8769 tree vec_mask = NULL;
8770 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8771 for (i = 0; i < group_size; i++)
8772 gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
8773
8774 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8775 {
8776 gcc_assert (!slp && grouped_store);
8777 for (j = 0; j < ncopies; j++)
8778 {
8779 gimple *new_stmt;
8780 if (j == 0)
8781 {
8782 /* For interleaved stores we collect vectorized defs for all
8783 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8784 as an input to vect_permute_store_chain(). */
8785 stmt_vec_info next_stmt_info = first_stmt_info;
8786 for (i = 0; i < group_size; i++)
8787 {
8788 /* Since gaps are not supported for interleaved stores,
8789 DR_GROUP_SIZE is the exact number of stmts in the
8790 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8791 op = vect_get_store_rhs (next_stmt_info);
8792 vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
8793 op, gvec_oprnds[i]);
8794 vec_oprnd = (*gvec_oprnds[i])[0];
8795 dr_chain.quick_push (vec_oprnd);
8796 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8797 }
8798 if (mask)
8799 {
8800 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8801 mask, &vec_masks,
8802 mask_vectype);
8803 vec_mask = vec_masks[0];
8804 }
8805
8806 /* We should have catched mismatched types earlier. */
8807 gcc_assert (
8808 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8809 dataref_ptr
8810 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8811 NULL, offset, &dummy, gsi,
8812 &ptr_incr, false, bump);
8813 }
8814 else
8815 {
8816 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8817 /* DR_CHAIN is then used as an input to
8818 vect_permute_store_chain(). */
8819 for (i = 0; i < group_size; i++)
8820 {
8821 vec_oprnd = (*gvec_oprnds[i])[j];
8822 dr_chain[i] = vec_oprnd;
8823 }
8824 if (mask)
8825 vec_mask = vec_masks[j];
8826 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8827 stmt_info, bump);
8828 }
8829
8830 /* Get an array into which we can store the individual vectors. */
8831 tree vec_array = create_vector_array (vectype, vec_num);
8832
8833 /* Invalidate the current contents of VEC_ARRAY. This should
8834 become an RTL clobber too, which prevents the vector registers
8835 from being upward-exposed. */
8836 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8837
8838 /* Store the individual vectors into the array. */
8839 for (i = 0; i < vec_num; i++)
8840 {
8841 vec_oprnd = dr_chain[i];
8842 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8843 i);
8844 }
8845
8846 tree final_mask = NULL;
8847 tree final_len = NULL;
8848 tree bias = NULL;
8849 if (loop_masks)
8850 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8851 ncopies, vectype, j);
8852 if (vec_mask)
8853 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8854 vec_mask, gsi);
8855
8856 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8857 {
8858 if (loop_lens)
8859 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8860 ncopies, vectype, j, 1);
8861 else
8862 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8863 signed char biasval
8864 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8865 bias = build_int_cst (intQI_type_node, biasval);
8866 if (!final_mask)
8867 {
8868 mask_vectype = truth_type_for (vectype);
8869 final_mask = build_minus_one_cst (mask_vectype);
8870 }
8871 }
8872
8873 gcall *call;
8874 if (final_len && final_mask)
8875 {
8876 /* Emit:
8877 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8878 LEN, BIAS, VEC_ARRAY). */
8879 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8880 tree alias_ptr = build_int_cst (ref_type, align);
8881 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8882 dataref_ptr, alias_ptr,
8883 final_mask, final_len, bias,
8884 vec_array);
8885 }
8886 else if (final_mask)
8887 {
8888 /* Emit:
8889 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8890 VEC_ARRAY). */
8891 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8892 tree alias_ptr = build_int_cst (ref_type, align);
8893 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8894 dataref_ptr, alias_ptr,
8895 final_mask, vec_array);
8896 }
8897 else
8898 {
8899 /* Emit:
8900 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8901 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8902 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8903 gimple_call_set_lhs (call, data_ref);
8904 }
8905 gimple_call_set_nothrow (call, true);
8906 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8907 new_stmt = call;
8908
8909 /* Record that VEC_ARRAY is now dead. */
8910 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8911 if (j == 0)
8912 *vec_stmt = new_stmt;
8913 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8914 }
8915
8916 return true;
8917 }
8918
8919 if (memory_access_type == VMAT_GATHER_SCATTER)
8920 {
8921 gcc_assert (!slp && !grouped_store);
8922 auto_vec<tree> vec_offsets;
8923 for (j = 0; j < ncopies; j++)
8924 {
8925 gimple *new_stmt;
8926 if (j == 0)
8927 {
8928 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8929 DR_CHAIN is of size 1. */
8930 gcc_assert (group_size == 1);
8931 op = vect_get_store_rhs (first_stmt_info);
8932 vect_get_vec_defs_for_operand (vinfo, first_stmt_info, ncopies,
8933 op, gvec_oprnds[0]);
8934 vec_oprnd = (*gvec_oprnds[0])[0];
8935 dr_chain.quick_push (vec_oprnd);
8936 if (mask)
8937 {
8938 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8939 mask, &vec_masks,
8940 mask_vectype);
8941 vec_mask = vec_masks[0];
8942 }
8943
8944 /* We should have catched mismatched types earlier. */
8945 gcc_assert (useless_type_conversion_p (vectype,
8946 TREE_TYPE (vec_oprnd)));
8947 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8948 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8949 slp_node, &gs_info, &dataref_ptr,
8950 &vec_offsets);
8951 else
8952 dataref_ptr
8953 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8954 NULL, offset, &dummy, gsi,
8955 &ptr_incr, false, bump);
8956 }
8957 else
8958 {
8959 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8960 vec_oprnd = (*gvec_oprnds[0])[j];
8961 dr_chain[0] = vec_oprnd;
8962 if (mask)
8963 vec_mask = vec_masks[j];
8964 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8965 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8966 gsi, stmt_info, bump);
8967 }
8968
8969 new_stmt = NULL;
8970 unsigned HOST_WIDE_INT align;
8971 tree final_mask = NULL_TREE;
8972 tree final_len = NULL_TREE;
8973 tree bias = NULL_TREE;
8974 if (loop_masks)
8975 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8976 ncopies, vectype, j);
8977 if (vec_mask)
8978 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8979 vec_mask, gsi);
8980
8981 if (gs_info.ifn != IFN_LAST)
8982 {
8983 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8984 vec_offset = vec_offsets[j];
8985 tree scale = size_int (gs_info.scale);
8986
8987 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
8988 {
8989 if (loop_lens)
8990 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8991 ncopies, vectype, j, 1);
8992 else
8993 final_len = build_int_cst (sizetype,
8994 TYPE_VECTOR_SUBPARTS (vectype));
8995 signed char biasval
8996 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8997 bias = build_int_cst (intQI_type_node, biasval);
8998 if (!final_mask)
8999 {
9000 mask_vectype = truth_type_for (vectype);
9001 final_mask = build_minus_one_cst (mask_vectype);
9002 }
9003 }
9004
9005 gcall *call;
9006 if (final_len && final_mask)
9007 call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
9008 7, dataref_ptr, vec_offset,
9009 scale, vec_oprnd, final_mask,
9010 final_len, bias);
9011 else if (final_mask)
9012 call
9013 = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
9014 dataref_ptr, vec_offset, scale,
9015 vec_oprnd, final_mask);
9016 else
9017 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9018 dataref_ptr, vec_offset,
9019 scale, vec_oprnd);
9020 gimple_call_set_nothrow (call, true);
9021 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9022 new_stmt = call;
9023 }
9024 else
9025 {
9026 /* Emulated scatter. */
9027 gcc_assert (!final_mask);
9028 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9029 unsigned HOST_WIDE_INT const_offset_nunits
9030 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9031 vec<constructor_elt, va_gc> *ctor_elts;
9032 vec_alloc (ctor_elts, const_nunits);
9033 gimple_seq stmts = NULL;
9034 tree elt_type = TREE_TYPE (vectype);
9035 unsigned HOST_WIDE_INT elt_size
9036 = tree_to_uhwi (TYPE_SIZE (elt_type));
9037 /* We support offset vectors with more elements
9038 than the data vector for now. */
9039 unsigned HOST_WIDE_INT factor
9040 = const_offset_nunits / const_nunits;
9041 vec_offset = vec_offsets[j / factor];
9042 unsigned elt_offset = (j % factor) * const_nunits;
9043 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9044 tree scale = size_int (gs_info.scale);
9045 align = get_object_alignment (DR_REF (first_dr_info->dr));
9046 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9047 for (unsigned k = 0; k < const_nunits; ++k)
9048 {
9049 /* Compute the offsetted pointer. */
9050 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9051 bitsize_int (k + elt_offset));
9052 tree idx
9053 = gimple_build (&stmts, BIT_FIELD_REF, idx_type, vec_offset,
9054 TYPE_SIZE (idx_type), boff);
9055 idx = gimple_convert (&stmts, sizetype, idx);
9056 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
9057 tree ptr
9058 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (dataref_ptr),
9059 dataref_ptr, idx);
9060 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9061 /* Extract the element to be stored. */
9062 tree elt
9063 = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE (vectype),
9064 vec_oprnd, TYPE_SIZE (elt_type),
9065 bitsize_int (k * elt_size));
9066 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9067 stmts = NULL;
9068 tree ref
9069 = build2 (MEM_REF, ltype, ptr, build_int_cst (ref_type, 0));
9070 new_stmt = gimple_build_assign (ref, elt);
9071 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9072 }
9073 }
9074 if (j == 0)
9075 *vec_stmt = new_stmt;
9076 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9077 }
9078 return true;
9079 }
9080
9081 auto_vec<tree> result_chain (group_size);
9082 auto_vec<tree, 1> vec_oprnds;
9083 for (j = 0; j < ncopies; j++)
9084 {
9085 gimple *new_stmt;
9086 if (j == 0)
9087 {
9088 if (slp)
9089 {
9090 /* Get vectorized arguments for SLP_NODE. */
9091 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9092 &vec_oprnds, mask, &vec_masks);
9093 vec_oprnd = vec_oprnds[0];
9094 if (mask)
9095 vec_mask = vec_masks[0];
9096 }
9097 else
9098 {
9099 /* For interleaved stores we collect vectorized defs for all the
9100 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9101 input to vect_permute_store_chain().
9102
9103 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9104 is of size 1. */
9105 stmt_vec_info next_stmt_info = first_stmt_info;
9106 for (i = 0; i < group_size; i++)
9107 {
9108 /* Since gaps are not supported for interleaved stores,
9109 DR_GROUP_SIZE is the exact number of stmts in the chain.
9110 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9111 that there is no interleaving, DR_GROUP_SIZE is 1,
9112 and only one iteration of the loop will be executed. */
9113 op = vect_get_store_rhs (next_stmt_info);
9114 vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
9115 op, gvec_oprnds[i]);
9116 vec_oprnd = (*gvec_oprnds[i])[0];
9117 dr_chain.quick_push (vec_oprnd);
9118 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9119 }
9120 if (mask)
9121 {
9122 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9123 mask, &vec_masks,
9124 mask_vectype);
9125 vec_mask = vec_masks[0];
9126 }
9127 }
9128
9129 /* We should have catched mismatched types earlier. */
9130 gcc_assert (useless_type_conversion_p (vectype,
9131 TREE_TYPE (vec_oprnd)));
9132 bool simd_lane_access_p
9133 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9134 if (simd_lane_access_p
9135 && !loop_masks
9136 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9137 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9138 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9139 && integer_zerop (DR_INIT (first_dr_info->dr))
9140 && alias_sets_conflict_p (get_alias_set (aggr_type),
9141 get_alias_set (TREE_TYPE (ref_type))))
9142 {
9143 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9144 dataref_offset = build_int_cst (ref_type, 0);
9145 }
9146 else
9147 dataref_ptr
9148 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9149 simd_lane_access_p ? loop : NULL,
9150 offset, &dummy, gsi, &ptr_incr,
9151 simd_lane_access_p, bump);
9152 }
9153 else
9154 {
9155 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9156 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9157 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9158 of size 1. */
9159 for (i = 0; i < group_size; i++)
9160 {
9161 vec_oprnd = (*gvec_oprnds[i])[j];
9162 dr_chain[i] = vec_oprnd;
9163 }
9164 if (mask)
9165 vec_mask = vec_masks[j];
9166 if (dataref_offset)
9167 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9168 else
9169 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9170 stmt_info, bump);
9171 }
9172
9173 new_stmt = NULL;
9174 if (grouped_store)
9175 /* Permute. */
9176 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info, gsi,
9177 &result_chain);
9178
9179 stmt_vec_info next_stmt_info = first_stmt_info;
9180 for (i = 0; i < vec_num; i++)
9181 {
9182 unsigned misalign;
9183 unsigned HOST_WIDE_INT align;
9184
9185 tree final_mask = NULL_TREE;
9186 tree final_len = NULL_TREE;
9187 tree bias = NULL_TREE;
9188 if (loop_masks)
9189 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9190 vec_num * ncopies, vectype,
9191 vec_num * j + i);
9192 if (slp && vec_mask)
9193 vec_mask = vec_masks[i];
9194 if (vec_mask)
9195 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9196 vec_mask, gsi);
9197
9198 if (i > 0)
9199 /* Bump the vector pointer. */
9200 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9201 stmt_info, bump);
9202
9203 if (slp)
9204 vec_oprnd = vec_oprnds[i];
9205 else if (grouped_store)
9206 /* For grouped stores vectorized defs are interleaved in
9207 vect_permute_store_chain(). */
9208 vec_oprnd = result_chain[i];
9209
9210 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9211 if (alignment_support_scheme == dr_aligned)
9212 misalign = 0;
9213 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9214 {
9215 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9216 misalign = 0;
9217 }
9218 else
9219 misalign = misalignment;
9220 if (dataref_offset == NULL_TREE
9221 && TREE_CODE (dataref_ptr) == SSA_NAME)
9222 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9223 misalign);
9224 align = least_bit_hwi (misalign | align);
9225
9226 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9227 {
9228 tree perm_mask = perm_mask_for_reverse (vectype);
9229 tree perm_dest
9230 = vect_create_destination_var (vect_get_store_rhs (stmt_info),
9231 vectype);
9232 tree new_temp = make_ssa_name (perm_dest);
9233
9234 /* Generate the permute statement. */
9235 gimple *perm_stmt
9236 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9237 vec_oprnd, perm_mask);
9238 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9239
9240 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9241 vec_oprnd = new_temp;
9242 }
9243
9244 /* Compute IFN when LOOP_LENS or final_mask valid. */
9245 machine_mode vmode = TYPE_MODE (vectype);
9246 machine_mode new_vmode = vmode;
9247 internal_fn partial_ifn = IFN_LAST;
9248 if (loop_lens)
9249 {
9250 opt_machine_mode new_ovmode
9251 = get_len_load_store_mode (vmode, false, &partial_ifn);
9252 new_vmode = new_ovmode.require ();
9253 unsigned factor
9254 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9255 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9256 vec_num * ncopies, vectype,
9257 vec_num * j + i, factor);
9258 }
9259 else if (final_mask)
9260 {
9261 if (!can_vec_mask_load_store_p (
9262 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9263 &partial_ifn))
9264 gcc_unreachable ();
9265 }
9266
9267 if (partial_ifn == IFN_MASK_LEN_STORE)
9268 {
9269 if (!final_len)
9270 {
9271 /* Pass VF value to 'len' argument of
9272 MASK_LEN_STORE if LOOP_LENS is invalid. */
9273 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9274 }
9275 if (!final_mask)
9276 {
9277 /* Pass all ones value to 'mask' argument of
9278 MASK_LEN_STORE if final_mask is invalid. */
9279 mask_vectype = truth_type_for (vectype);
9280 final_mask = build_minus_one_cst (mask_vectype);
9281 }
9282 }
9283 if (final_len)
9284 {
9285 signed char biasval
9286 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9287
9288 bias = build_int_cst (intQI_type_node, biasval);
9289 }
9290
9291 /* Arguments are ready. Create the new vector stmt. */
9292 if (final_len)
9293 {
9294 gcall *call;
9295 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9296 /* Need conversion if it's wrapped with VnQI. */
9297 if (vmode != new_vmode)
9298 {
9299 tree new_vtype
9300 = build_vector_type_for_mode (unsigned_intQI_type_node,
9301 new_vmode);
9302 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9303 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9304 gassign *new_stmt
9305 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9306 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9307 vec_oprnd = var;
9308 }
9309
9310 if (partial_ifn == IFN_MASK_LEN_STORE)
9311 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9312 dataref_ptr, ptr, final_mask,
9313 final_len, bias, vec_oprnd);
9314 else
9315 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9316 dataref_ptr, ptr, final_len,
9317 bias, vec_oprnd);
9318 gimple_call_set_nothrow (call, true);
9319 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9320 new_stmt = call;
9321 }
9322 else if (final_mask)
9323 {
9324 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9325 gcall *call
9326 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9327 ptr, final_mask, vec_oprnd);
9328 gimple_call_set_nothrow (call, true);
9329 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9330 new_stmt = call;
9331 }
9332 else
9333 {
9334 data_ref
9335 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9336 dataref_offset ? dataref_offset
9337 : build_int_cst (ref_type, 0));
9338 if (alignment_support_scheme == dr_aligned)
9339 ;
9340 else
9341 TREE_TYPE (data_ref)
9342 = build_aligned_type (TREE_TYPE (data_ref),
9343 align * BITS_PER_UNIT);
9344 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9345 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9346 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9347 }
9348
9349 if (slp)
9350 continue;
9351
9352 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9353 if (!next_stmt_info)
9354 break;
9355 }
9356 if (!slp)
9357 {
9358 if (j == 0)
9359 *vec_stmt = new_stmt;
9360 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9361 }
9362 }
9363
9364 return true;
9365 }
9366
9367 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9368 VECTOR_CST mask. No checks are made that the target platform supports the
9369 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9370 vect_gen_perm_mask_checked. */
9371
9372 tree
9373 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9374 {
9375 tree mask_type;
9376
9377 poly_uint64 nunits = sel.length ();
9378 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9379
9380 mask_type = build_vector_type (ssizetype, nunits);
9381 return vec_perm_indices_to_tree (mask_type, sel);
9382 }
9383
9384 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9385 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9386
9387 tree
9388 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9389 {
9390 machine_mode vmode = TYPE_MODE (vectype);
9391 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9392 return vect_gen_perm_mask_any (vectype, sel);
9393 }
9394
9395 /* Given a vector variable X and Y, that was generated for the scalar
9396 STMT_INFO, generate instructions to permute the vector elements of X and Y
9397 using permutation mask MASK_VEC, insert them at *GSI and return the
9398 permuted vector variable. */
9399
9400 static tree
9401 permute_vec_elements (vec_info *vinfo,
9402 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9403 gimple_stmt_iterator *gsi)
9404 {
9405 tree vectype = TREE_TYPE (x);
9406 tree perm_dest, data_ref;
9407 gimple *perm_stmt;
9408
9409 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9410 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9411 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9412 else
9413 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9414 data_ref = make_ssa_name (perm_dest);
9415
9416 /* Generate the permute statement. */
9417 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9418 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9419
9420 return data_ref;
9421 }
9422
9423 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9424 inserting them on the loops preheader edge. Returns true if we
9425 were successful in doing so (and thus STMT_INFO can be moved then),
9426 otherwise returns false. HOIST_P indicates if we want to hoist the
9427 definitions of all SSA uses, it would be false when we are costing. */
9428
9429 static bool
9430 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9431 {
9432 ssa_op_iter i;
9433 tree op;
9434 bool any = false;
9435
9436 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9437 {
9438 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9439 if (!gimple_nop_p (def_stmt)
9440 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9441 {
9442 /* Make sure we don't need to recurse. While we could do
9443 so in simple cases when there are more complex use webs
9444 we don't have an easy way to preserve stmt order to fulfil
9445 dependencies within them. */
9446 tree op2;
9447 ssa_op_iter i2;
9448 if (gimple_code (def_stmt) == GIMPLE_PHI)
9449 return false;
9450 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9451 {
9452 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9453 if (!gimple_nop_p (def_stmt2)
9454 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9455 return false;
9456 }
9457 any = true;
9458 }
9459 }
9460
9461 if (!any)
9462 return true;
9463
9464 if (!hoist_p)
9465 return true;
9466
9467 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9468 {
9469 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9470 if (!gimple_nop_p (def_stmt)
9471 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9472 {
9473 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9474 gsi_remove (&gsi, false);
9475 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9476 }
9477 }
9478
9479 return true;
9480 }
9481
9482 /* vectorizable_load.
9483
9484 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9485 that can be vectorized.
9486 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9487 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9488 Return true if STMT_INFO is vectorizable in this way. */
9489
9490 static bool
9491 vectorizable_load (vec_info *vinfo,
9492 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9493 gimple **vec_stmt, slp_tree slp_node,
9494 stmt_vector_for_cost *cost_vec)
9495 {
9496 tree scalar_dest;
9497 tree vec_dest = NULL;
9498 tree data_ref = NULL;
9499 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9500 class loop *loop = NULL;
9501 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9502 bool nested_in_vect_loop = false;
9503 tree elem_type;
9504 /* Avoid false positive uninitialized warning, see PR110652. */
9505 tree new_temp = NULL_TREE;
9506 machine_mode mode;
9507 tree dummy;
9508 tree dataref_ptr = NULL_TREE;
9509 tree dataref_offset = NULL_TREE;
9510 gimple *ptr_incr = NULL;
9511 int ncopies;
9512 int i, j;
9513 unsigned int group_size;
9514 poly_uint64 group_gap_adj;
9515 tree msq = NULL_TREE, lsq;
9516 tree realignment_token = NULL_TREE;
9517 gphi *phi = NULL;
9518 vec<tree> dr_chain = vNULL;
9519 bool grouped_load = false;
9520 stmt_vec_info first_stmt_info;
9521 stmt_vec_info first_stmt_info_for_drptr = NULL;
9522 bool compute_in_loop = false;
9523 class loop *at_loop;
9524 int vec_num;
9525 bool slp = (slp_node != NULL);
9526 bool slp_perm = false;
9527 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9528 poly_uint64 vf;
9529 tree aggr_type;
9530 gather_scatter_info gs_info;
9531 tree ref_type;
9532 enum vect_def_type mask_dt = vect_unknown_def_type;
9533
9534 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9535 return false;
9536
9537 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9538 && ! vec_stmt)
9539 return false;
9540
9541 if (!STMT_VINFO_DATA_REF (stmt_info))
9542 return false;
9543
9544 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9545 int mask_index = -1;
9546 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9547 {
9548 scalar_dest = gimple_assign_lhs (assign);
9549 if (TREE_CODE (scalar_dest) != SSA_NAME)
9550 return false;
9551
9552 tree_code code = gimple_assign_rhs_code (assign);
9553 if (code != ARRAY_REF
9554 && code != BIT_FIELD_REF
9555 && code != INDIRECT_REF
9556 && code != COMPONENT_REF
9557 && code != IMAGPART_EXPR
9558 && code != REALPART_EXPR
9559 && code != MEM_REF
9560 && TREE_CODE_CLASS (code) != tcc_declaration)
9561 return false;
9562 }
9563 else
9564 {
9565 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9566 if (!call || !gimple_call_internal_p (call))
9567 return false;
9568
9569 internal_fn ifn = gimple_call_internal_fn (call);
9570 if (!internal_load_fn_p (ifn))
9571 return false;
9572
9573 scalar_dest = gimple_call_lhs (call);
9574 if (!scalar_dest)
9575 return false;
9576
9577 mask_index = internal_fn_mask_index (ifn);
9578 if (mask_index >= 0 && slp_node)
9579 mask_index = vect_slp_child_index_for_operand (call, mask_index);
9580 if (mask_index >= 0
9581 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9582 &mask, NULL, &mask_dt, &mask_vectype))
9583 return false;
9584 }
9585
9586 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9587 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9588
9589 if (loop_vinfo)
9590 {
9591 loop = LOOP_VINFO_LOOP (loop_vinfo);
9592 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9593 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9594 }
9595 else
9596 vf = 1;
9597
9598 /* Multiple types in SLP are handled by creating the appropriate number of
9599 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9600 case of SLP. */
9601 if (slp)
9602 ncopies = 1;
9603 else
9604 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9605
9606 gcc_assert (ncopies >= 1);
9607
9608 /* FORNOW. This restriction should be relaxed. */
9609 if (nested_in_vect_loop && ncopies > 1)
9610 {
9611 if (dump_enabled_p ())
9612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9613 "multiple types in nested loop.\n");
9614 return false;
9615 }
9616
9617 /* Invalidate assumptions made by dependence analysis when vectorization
9618 on the unrolled body effectively re-orders stmts. */
9619 if (ncopies > 1
9620 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9621 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9622 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9623 {
9624 if (dump_enabled_p ())
9625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9626 "cannot perform implicit CSE when unrolling "
9627 "with negative dependence distance\n");
9628 return false;
9629 }
9630
9631 elem_type = TREE_TYPE (vectype);
9632 mode = TYPE_MODE (vectype);
9633
9634 /* FORNOW. In some cases can vectorize even if data-type not supported
9635 (e.g. - data copies). */
9636 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9637 {
9638 if (dump_enabled_p ())
9639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9640 "Aligned load, but unsupported type.\n");
9641 return false;
9642 }
9643
9644 /* Check if the load is a part of an interleaving chain. */
9645 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9646 {
9647 grouped_load = true;
9648 /* FORNOW */
9649 gcc_assert (!nested_in_vect_loop);
9650 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9651
9652 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9653 group_size = DR_GROUP_SIZE (first_stmt_info);
9654
9655 /* Refuse non-SLP vectorization of SLP-only groups. */
9656 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9657 {
9658 if (dump_enabled_p ())
9659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9660 "cannot vectorize load in non-SLP mode.\n");
9661 return false;
9662 }
9663
9664 /* Invalidate assumptions made by dependence analysis when vectorization
9665 on the unrolled body effectively re-orders stmts. */
9666 if (!PURE_SLP_STMT (stmt_info)
9667 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9668 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9669 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9670 {
9671 if (dump_enabled_p ())
9672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9673 "cannot perform implicit CSE when performing "
9674 "group loads with negative dependence distance\n");
9675 return false;
9676 }
9677 }
9678 else
9679 group_size = 1;
9680
9681 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9682 {
9683 slp_perm = true;
9684
9685 if (!loop_vinfo)
9686 {
9687 /* In BB vectorization we may not actually use a loaded vector
9688 accessing elements in excess of DR_GROUP_SIZE. */
9689 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9690 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9691 unsigned HOST_WIDE_INT nunits;
9692 unsigned j, k, maxk = 0;
9693 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9694 if (k > maxk)
9695 maxk = k;
9696 tree vectype = SLP_TREE_VECTYPE (slp_node);
9697 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9698 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9699 {
9700 if (dump_enabled_p ())
9701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9702 "BB vectorization with gaps at the end of "
9703 "a load is not supported\n");
9704 return false;
9705 }
9706 }
9707
9708 auto_vec<tree> tem;
9709 unsigned n_perms;
9710 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9711 true, &n_perms))
9712 {
9713 if (dump_enabled_p ())
9714 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9715 vect_location,
9716 "unsupported load permutation\n");
9717 return false;
9718 }
9719 }
9720
9721 vect_memory_access_type memory_access_type;
9722 enum dr_alignment_support alignment_support_scheme;
9723 int misalignment;
9724 poly_int64 poffset;
9725 internal_fn lanes_ifn;
9726 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9727 ncopies, &memory_access_type, &poffset,
9728 &alignment_support_scheme, &misalignment, &gs_info,
9729 &lanes_ifn))
9730 return false;
9731
9732 if (mask)
9733 {
9734 if (memory_access_type == VMAT_CONTIGUOUS)
9735 {
9736 machine_mode vec_mode = TYPE_MODE (vectype);
9737 if (!VECTOR_MODE_P (vec_mode)
9738 || !can_vec_mask_load_store_p (vec_mode,
9739 TYPE_MODE (mask_vectype), true))
9740 return false;
9741 }
9742 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9743 && memory_access_type != VMAT_GATHER_SCATTER)
9744 {
9745 if (dump_enabled_p ())
9746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9747 "unsupported access type for masked load.\n");
9748 return false;
9749 }
9750 else if (memory_access_type == VMAT_GATHER_SCATTER
9751 && gs_info.ifn == IFN_LAST
9752 && !gs_info.decl)
9753 {
9754 if (dump_enabled_p ())
9755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9756 "unsupported masked emulated gather.\n");
9757 return false;
9758 }
9759 }
9760
9761 bool costing_p = !vec_stmt;
9762
9763 if (costing_p) /* transformation not required. */
9764 {
9765 if (slp_node
9766 && mask
9767 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9768 mask_vectype))
9769 {
9770 if (dump_enabled_p ())
9771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9772 "incompatible vector types for invariants\n");
9773 return false;
9774 }
9775
9776 if (!slp)
9777 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9778
9779 if (loop_vinfo
9780 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9781 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9782 VLS_LOAD, group_size,
9783 memory_access_type, &gs_info,
9784 mask);
9785
9786 if (dump_enabled_p ()
9787 && memory_access_type != VMAT_ELEMENTWISE
9788 && memory_access_type != VMAT_GATHER_SCATTER
9789 && alignment_support_scheme != dr_aligned)
9790 dump_printf_loc (MSG_NOTE, vect_location,
9791 "Vectorizing an unaligned access.\n");
9792
9793 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9794 vinfo->any_known_not_updated_vssa = true;
9795
9796 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9797 }
9798
9799 if (!slp)
9800 gcc_assert (memory_access_type
9801 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9802
9803 if (dump_enabled_p () && !costing_p)
9804 dump_printf_loc (MSG_NOTE, vect_location,
9805 "transform load. ncopies = %d\n", ncopies);
9806
9807 /* Transform. */
9808
9809 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9810 ensure_base_align (dr_info);
9811
9812 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9813 {
9814 vect_build_gather_load_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
9815 mask, cost_vec);
9816 return true;
9817 }
9818
9819 if (memory_access_type == VMAT_INVARIANT)
9820 {
9821 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9822 /* If we have versioned for aliasing or the loop doesn't
9823 have any data dependencies that would preclude this,
9824 then we are sure this is a loop invariant load and
9825 thus we can insert it on the preheader edge.
9826 TODO: hoist_defs_of_uses should ideally be computed
9827 once at analysis time, remembered and used in the
9828 transform time. */
9829 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9830 && !nested_in_vect_loop
9831 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
9832 if (costing_p)
9833 {
9834 enum vect_cost_model_location cost_loc
9835 = hoist_p ? vect_prologue : vect_body;
9836 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
9837 stmt_info, 0, cost_loc);
9838 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
9839 cost_loc);
9840 unsigned int prologue_cost = hoist_p ? cost : 0;
9841 unsigned int inside_cost = hoist_p ? 0 : cost;
9842 if (dump_enabled_p ())
9843 dump_printf_loc (MSG_NOTE, vect_location,
9844 "vect_model_load_cost: inside_cost = %d, "
9845 "prologue_cost = %d .\n",
9846 inside_cost, prologue_cost);
9847 return true;
9848 }
9849 if (hoist_p)
9850 {
9851 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9852 if (dump_enabled_p ())
9853 dump_printf_loc (MSG_NOTE, vect_location,
9854 "hoisting out of the vectorized loop: %G",
9855 (gimple *) stmt);
9856 scalar_dest = copy_ssa_name (scalar_dest);
9857 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9858 edge pe = loop_preheader_edge (loop);
9859 gphi *vphi = get_virtual_phi (loop->header);
9860 tree vuse;
9861 if (vphi)
9862 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9863 else
9864 vuse = gimple_vuse (gsi_stmt (*gsi));
9865 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9866 gimple_set_vuse (new_stmt, vuse);
9867 gsi_insert_on_edge_immediate (pe, new_stmt);
9868 }
9869 /* These copies are all equivalent. */
9870 if (hoist_p)
9871 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9872 vectype, NULL);
9873 else
9874 {
9875 gimple_stmt_iterator gsi2 = *gsi;
9876 gsi_next (&gsi2);
9877 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9878 vectype, &gsi2);
9879 }
9880 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9881 if (slp)
9882 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
9883 slp_node->push_vec_def (new_stmt);
9884 else
9885 {
9886 for (j = 0; j < ncopies; ++j)
9887 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9888 *vec_stmt = new_stmt;
9889 }
9890 return true;
9891 }
9892
9893 if (memory_access_type == VMAT_ELEMENTWISE
9894 || memory_access_type == VMAT_STRIDED_SLP)
9895 {
9896 gimple_stmt_iterator incr_gsi;
9897 bool insert_after;
9898 tree offvar;
9899 tree ivstep;
9900 tree running_off;
9901 vec<constructor_elt, va_gc> *v = NULL;
9902 tree stride_base, stride_step, alias_off;
9903 /* Checked by get_load_store_type. */
9904 unsigned int const_nunits = nunits.to_constant ();
9905 unsigned HOST_WIDE_INT cst_offset = 0;
9906 tree dr_offset;
9907 unsigned int inside_cost = 0;
9908
9909 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9910 gcc_assert (!nested_in_vect_loop);
9911
9912 if (grouped_load)
9913 {
9914 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9915 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9916 }
9917 else
9918 {
9919 first_stmt_info = stmt_info;
9920 first_dr_info = dr_info;
9921 }
9922
9923 if (slp && grouped_load)
9924 {
9925 group_size = DR_GROUP_SIZE (first_stmt_info);
9926 ref_type = get_group_alias_ptr_type (first_stmt_info);
9927 }
9928 else
9929 {
9930 if (grouped_load)
9931 cst_offset
9932 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9933 * vect_get_place_in_interleaving_chain (stmt_info,
9934 first_stmt_info));
9935 group_size = 1;
9936 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9937 }
9938
9939 if (!costing_p)
9940 {
9941 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9942 stride_base = fold_build_pointer_plus (
9943 DR_BASE_ADDRESS (first_dr_info->dr),
9944 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
9945 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9946 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9947
9948 /* For a load with loop-invariant (but other than power-of-2)
9949 stride (i.e. not a grouped access) like so:
9950
9951 for (i = 0; i < n; i += stride)
9952 ... = array[i];
9953
9954 we generate a new induction variable and new accesses to
9955 form a new vector (or vectors, depending on ncopies):
9956
9957 for (j = 0; ; j += VF*stride)
9958 tmp1 = array[j];
9959 tmp2 = array[j + stride];
9960 ...
9961 vectemp = {tmp1, tmp2, ...}
9962 */
9963
9964 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9965 build_int_cst (TREE_TYPE (stride_step), vf));
9966
9967 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9968
9969 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9970 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9971 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
9972 loop, &incr_gsi, insert_after,
9973 &offvar, NULL);
9974
9975 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9976 }
9977
9978 running_off = offvar;
9979 alias_off = build_int_cst (ref_type, 0);
9980 int nloads = const_nunits;
9981 int lnel = 1;
9982 tree ltype = TREE_TYPE (vectype);
9983 tree lvectype = vectype;
9984 auto_vec<tree> dr_chain;
9985 if (memory_access_type == VMAT_STRIDED_SLP)
9986 {
9987 if (group_size < const_nunits)
9988 {
9989 /* First check if vec_init optab supports construction from vector
9990 elts directly. Otherwise avoid emitting a constructor of
9991 vector elements by performing the loads using an integer type
9992 of the same size, constructing a vector of those and then
9993 re-interpreting it as the original vector type. This avoids a
9994 huge runtime penalty due to the general inability to perform
9995 store forwarding from smaller stores to a larger load. */
9996 tree ptype;
9997 tree vtype
9998 = vector_vector_composition_type (vectype,
9999 const_nunits / group_size,
10000 &ptype);
10001 if (vtype != NULL_TREE)
10002 {
10003 nloads = const_nunits / group_size;
10004 lnel = group_size;
10005 lvectype = vtype;
10006 ltype = ptype;
10007 }
10008 }
10009 else
10010 {
10011 nloads = 1;
10012 lnel = const_nunits;
10013 ltype = vectype;
10014 }
10015 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10016 }
10017 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10018 else if (nloads == 1)
10019 ltype = vectype;
10020
10021 if (slp)
10022 {
10023 /* For SLP permutation support we need to load the whole group,
10024 not only the number of vector stmts the permutation result
10025 fits in. */
10026 if (slp_perm)
10027 {
10028 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10029 variable VF. */
10030 unsigned int const_vf = vf.to_constant ();
10031 ncopies = CEIL (group_size * const_vf, const_nunits);
10032 dr_chain.create (ncopies);
10033 }
10034 else
10035 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10036 }
10037 unsigned int group_el = 0;
10038 unsigned HOST_WIDE_INT
10039 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10040 unsigned int n_groups = 0;
10041 for (j = 0; j < ncopies; j++)
10042 {
10043 if (nloads > 1 && !costing_p)
10044 vec_alloc (v, nloads);
10045 gimple *new_stmt = NULL;
10046 for (i = 0; i < nloads; i++)
10047 {
10048 if (costing_p)
10049 {
10050 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10051 avoid ICE, see PR110776. */
10052 if (VECTOR_TYPE_P (ltype)
10053 && memory_access_type != VMAT_ELEMENTWISE)
10054 vect_get_load_cost (vinfo, stmt_info, 1,
10055 alignment_support_scheme, misalignment,
10056 false, &inside_cost, nullptr, cost_vec,
10057 cost_vec, true);
10058 else
10059 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10060 stmt_info, 0, vect_body);
10061 continue;
10062 }
10063 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10064 group_el * elsz + cst_offset);
10065 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10066 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10067 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10068 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10069 if (nloads > 1)
10070 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10071 gimple_assign_lhs (new_stmt));
10072
10073 group_el += lnel;
10074 if (! slp
10075 || group_el == group_size)
10076 {
10077 n_groups++;
10078 /* When doing SLP make sure to not load elements from
10079 the next vector iteration, those will not be accessed
10080 so just use the last element again. See PR107451. */
10081 if (!slp || known_lt (n_groups, vf))
10082 {
10083 tree newoff = copy_ssa_name (running_off);
10084 gimple *incr
10085 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10086 running_off, stride_step);
10087 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10088 running_off = newoff;
10089 }
10090 group_el = 0;
10091 }
10092 }
10093
10094 if (nloads > 1)
10095 {
10096 if (costing_p)
10097 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10098 stmt_info, 0, vect_body);
10099 else
10100 {
10101 tree vec_inv = build_constructor (lvectype, v);
10102 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10103 lvectype, gsi);
10104 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10105 if (lvectype != vectype)
10106 {
10107 new_stmt
10108 = gimple_build_assign (make_ssa_name (vectype),
10109 VIEW_CONVERT_EXPR,
10110 build1 (VIEW_CONVERT_EXPR,
10111 vectype, new_temp));
10112 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10113 gsi);
10114 }
10115 }
10116 }
10117
10118 if (!costing_p)
10119 {
10120 if (slp)
10121 {
10122 if (slp_perm)
10123 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10124 else
10125 slp_node->push_vec_def (new_stmt);
10126 }
10127 else
10128 {
10129 if (j == 0)
10130 *vec_stmt = new_stmt;
10131 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10132 }
10133 }
10134 }
10135 if (slp_perm)
10136 {
10137 unsigned n_perms;
10138 if (costing_p)
10139 {
10140 unsigned n_loads;
10141 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10142 true, &n_perms, &n_loads);
10143 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10144 first_stmt_info, 0, vect_body);
10145 }
10146 else
10147 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10148 false, &n_perms);
10149 }
10150
10151 if (costing_p && dump_enabled_p ())
10152 dump_printf_loc (MSG_NOTE, vect_location,
10153 "vect_model_load_cost: inside_cost = %u, "
10154 "prologue_cost = 0 .\n",
10155 inside_cost);
10156
10157 return true;
10158 }
10159
10160 if (memory_access_type == VMAT_GATHER_SCATTER
10161 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10162 grouped_load = false;
10163
10164 if (grouped_load
10165 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10166 {
10167 if (grouped_load)
10168 {
10169 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10170 group_size = DR_GROUP_SIZE (first_stmt_info);
10171 }
10172 else
10173 {
10174 first_stmt_info = stmt_info;
10175 group_size = 1;
10176 }
10177 /* For SLP vectorization we directly vectorize a subchain
10178 without permutation. */
10179 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10180 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10181 /* For BB vectorization always use the first stmt to base
10182 the data ref pointer on. */
10183 if (bb_vinfo)
10184 first_stmt_info_for_drptr
10185 = vect_find_first_scalar_stmt_in_slp (slp_node);
10186
10187 /* Check if the chain of loads is already vectorized. */
10188 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10189 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10190 ??? But we can only do so if there is exactly one
10191 as we have no way to get at the rest. Leave the CSE
10192 opportunity alone.
10193 ??? With the group load eventually participating
10194 in multiple different permutations (having multiple
10195 slp nodes which refer to the same group) the CSE
10196 is even wrong code. See PR56270. */
10197 && !slp)
10198 {
10199 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10200 return true;
10201 }
10202 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10203 group_gap_adj = 0;
10204
10205 /* VEC_NUM is the number of vect stmts to be created for this group. */
10206 if (slp)
10207 {
10208 grouped_load = false;
10209 /* If an SLP permutation is from N elements to N elements,
10210 and if one vector holds a whole number of N, we can load
10211 the inputs to the permutation in the same way as an
10212 unpermuted sequence. In other cases we need to load the
10213 whole group, not only the number of vector stmts the
10214 permutation result fits in. */
10215 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10216 if (slp_perm
10217 && (group_size != scalar_lanes
10218 || !multiple_p (nunits, group_size)))
10219 {
10220 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10221 variable VF; see vect_transform_slp_perm_load. */
10222 unsigned int const_vf = vf.to_constant ();
10223 unsigned int const_nunits = nunits.to_constant ();
10224 vec_num = CEIL (group_size * const_vf, const_nunits);
10225 group_gap_adj = vf * group_size - nunits * vec_num;
10226 }
10227 else
10228 {
10229 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10230 group_gap_adj
10231 = group_size - scalar_lanes;
10232 }
10233 }
10234 else
10235 vec_num = group_size;
10236
10237 ref_type = get_group_alias_ptr_type (first_stmt_info);
10238 }
10239 else
10240 {
10241 first_stmt_info = stmt_info;
10242 first_dr_info = dr_info;
10243 group_size = vec_num = 1;
10244 group_gap_adj = 0;
10245 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10246 if (slp)
10247 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10248 }
10249
10250 gcc_assert (alignment_support_scheme);
10251 vec_loop_masks *loop_masks
10252 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10253 ? &LOOP_VINFO_MASKS (loop_vinfo)
10254 : NULL);
10255 vec_loop_lens *loop_lens
10256 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10257 ? &LOOP_VINFO_LENS (loop_vinfo)
10258 : NULL);
10259
10260 /* Shouldn't go with length-based approach if fully masked. */
10261 gcc_assert (!loop_lens || !loop_masks);
10262
10263 /* Targets with store-lane instructions must not require explicit
10264 realignment. vect_supportable_dr_alignment always returns either
10265 dr_aligned or dr_unaligned_supported for masked operations. */
10266 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10267 && !mask
10268 && !loop_masks)
10269 || alignment_support_scheme == dr_aligned
10270 || alignment_support_scheme == dr_unaligned_supported);
10271
10272 /* In case the vectorization factor (VF) is bigger than the number
10273 of elements that we can fit in a vectype (nunits), we have to generate
10274 more than one vector stmt - i.e - we need to "unroll" the
10275 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10276 from one copy of the vector stmt to the next, in the field
10277 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10278 stages to find the correct vector defs to be used when vectorizing
10279 stmts that use the defs of the current stmt. The example below
10280 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10281 need to create 4 vectorized stmts):
10282
10283 before vectorization:
10284 RELATED_STMT VEC_STMT
10285 S1: x = memref - -
10286 S2: z = x + 1 - -
10287
10288 step 1: vectorize stmt S1:
10289 We first create the vector stmt VS1_0, and, as usual, record a
10290 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10291 Next, we create the vector stmt VS1_1, and record a pointer to
10292 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10293 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10294 stmts and pointers:
10295 RELATED_STMT VEC_STMT
10296 VS1_0: vx0 = memref0 VS1_1 -
10297 VS1_1: vx1 = memref1 VS1_2 -
10298 VS1_2: vx2 = memref2 VS1_3 -
10299 VS1_3: vx3 = memref3 - -
10300 S1: x = load - VS1_0
10301 S2: z = x + 1 - -
10302 */
10303
10304 /* In case of interleaving (non-unit grouped access):
10305
10306 S1: x2 = &base + 2
10307 S2: x0 = &base
10308 S3: x1 = &base + 1
10309 S4: x3 = &base + 3
10310
10311 Vectorized loads are created in the order of memory accesses
10312 starting from the access of the first stmt of the chain:
10313
10314 VS1: vx0 = &base
10315 VS2: vx1 = &base + vec_size*1
10316 VS3: vx3 = &base + vec_size*2
10317 VS4: vx4 = &base + vec_size*3
10318
10319 Then permutation statements are generated:
10320
10321 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10322 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10323 ...
10324
10325 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10326 (the order of the data-refs in the output of vect_permute_load_chain
10327 corresponds to the order of scalar stmts in the interleaving chain - see
10328 the documentation of vect_permute_load_chain()).
10329 The generation of permutation stmts and recording them in
10330 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10331
10332 In case of both multiple types and interleaving, the vector loads and
10333 permutation stmts above are created for every copy. The result vector
10334 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10335 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10336
10337 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10338 on a target that supports unaligned accesses (dr_unaligned_supported)
10339 we generate the following code:
10340 p = initial_addr;
10341 indx = 0;
10342 loop {
10343 p = p + indx * vectype_size;
10344 vec_dest = *(p);
10345 indx = indx + 1;
10346 }
10347
10348 Otherwise, the data reference is potentially unaligned on a target that
10349 does not support unaligned accesses (dr_explicit_realign_optimized) -
10350 then generate the following code, in which the data in each iteration is
10351 obtained by two vector loads, one from the previous iteration, and one
10352 from the current iteration:
10353 p1 = initial_addr;
10354 msq_init = *(floor(p1))
10355 p2 = initial_addr + VS - 1;
10356 realignment_token = call target_builtin;
10357 indx = 0;
10358 loop {
10359 p2 = p2 + indx * vectype_size
10360 lsq = *(floor(p2))
10361 vec_dest = realign_load (msq, lsq, realignment_token)
10362 indx = indx + 1;
10363 msq = lsq;
10364 } */
10365
10366 /* If the misalignment remains the same throughout the execution of the
10367 loop, we can create the init_addr and permutation mask at the loop
10368 preheader. Otherwise, it needs to be created inside the loop.
10369 This can only occur when vectorizing memory accesses in the inner-loop
10370 nested within an outer-loop that is being vectorized. */
10371
10372 if (nested_in_vect_loop
10373 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10374 GET_MODE_SIZE (TYPE_MODE (vectype))))
10375 {
10376 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10377 compute_in_loop = true;
10378 }
10379
10380 bool diff_first_stmt_info
10381 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10382
10383 tree offset = NULL_TREE;
10384 if ((alignment_support_scheme == dr_explicit_realign_optimized
10385 || alignment_support_scheme == dr_explicit_realign)
10386 && !compute_in_loop)
10387 {
10388 /* If we have different first_stmt_info, we can't set up realignment
10389 here, since we can't guarantee first_stmt_info DR has been
10390 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10391 distance from first_stmt_info DR instead as below. */
10392 if (!costing_p)
10393 {
10394 if (!diff_first_stmt_info)
10395 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10396 &realignment_token,
10397 alignment_support_scheme, NULL_TREE,
10398 &at_loop);
10399 if (alignment_support_scheme == dr_explicit_realign_optimized)
10400 {
10401 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10402 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10403 size_one_node);
10404 gcc_assert (!first_stmt_info_for_drptr);
10405 }
10406 }
10407 }
10408 else
10409 at_loop = loop;
10410
10411 if (!known_eq (poffset, 0))
10412 offset = (offset
10413 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10414 : size_int (poffset));
10415
10416 tree bump;
10417 tree vec_offset = NULL_TREE;
10418 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10419 {
10420 aggr_type = NULL_TREE;
10421 bump = NULL_TREE;
10422 }
10423 else if (memory_access_type == VMAT_GATHER_SCATTER)
10424 {
10425 aggr_type = elem_type;
10426 if (!costing_p)
10427 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10428 &bump, &vec_offset, loop_lens);
10429 }
10430 else
10431 {
10432 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10433 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10434 else
10435 aggr_type = vectype;
10436 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10437 memory_access_type, loop_lens);
10438 }
10439
10440 auto_vec<tree> vec_offsets;
10441 auto_vec<tree> vec_masks;
10442 if (mask && !costing_p)
10443 {
10444 if (slp_node)
10445 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10446 &vec_masks);
10447 else
10448 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10449 &vec_masks, mask_vectype);
10450 }
10451
10452 tree vec_mask = NULL_TREE;
10453 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10454 {
10455 gcc_assert (alignment_support_scheme == dr_aligned
10456 || alignment_support_scheme == dr_unaligned_supported);
10457 gcc_assert (grouped_load && !slp);
10458
10459 unsigned int inside_cost = 0, prologue_cost = 0;
10460 for (j = 0; j < ncopies; j++)
10461 {
10462 if (costing_p)
10463 {
10464 /* An IFN_LOAD_LANES will load all its vector results,
10465 regardless of which ones we actually need. Account
10466 for the cost of unused results. */
10467 if (first_stmt_info == stmt_info)
10468 {
10469 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10470 stmt_vec_info next_stmt_info = first_stmt_info;
10471 do
10472 {
10473 gaps -= 1;
10474 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10475 }
10476 while (next_stmt_info);
10477 if (gaps)
10478 {
10479 if (dump_enabled_p ())
10480 dump_printf_loc (MSG_NOTE, vect_location,
10481 "vect_model_load_cost: %d "
10482 "unused vectors.\n",
10483 gaps);
10484 vect_get_load_cost (vinfo, stmt_info, gaps,
10485 alignment_support_scheme,
10486 misalignment, false, &inside_cost,
10487 &prologue_cost, cost_vec, cost_vec,
10488 true);
10489 }
10490 }
10491 vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme,
10492 misalignment, false, &inside_cost,
10493 &prologue_cost, cost_vec, cost_vec, true);
10494 continue;
10495 }
10496
10497 /* 1. Create the vector or array pointer update chain. */
10498 if (j == 0)
10499 dataref_ptr
10500 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10501 at_loop, offset, &dummy, gsi,
10502 &ptr_incr, false, bump);
10503 else
10504 {
10505 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10506 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10507 stmt_info, bump);
10508 }
10509 if (mask)
10510 vec_mask = vec_masks[j];
10511
10512 tree vec_array = create_vector_array (vectype, vec_num);
10513
10514 tree final_mask = NULL_TREE;
10515 tree final_len = NULL_TREE;
10516 tree bias = NULL_TREE;
10517 if (loop_masks)
10518 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10519 ncopies, vectype, j);
10520 if (vec_mask)
10521 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10522 vec_mask, gsi);
10523
10524 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10525 {
10526 if (loop_lens)
10527 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10528 ncopies, vectype, j, 1);
10529 else
10530 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10531 signed char biasval
10532 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10533 bias = build_int_cst (intQI_type_node, biasval);
10534 if (!final_mask)
10535 {
10536 mask_vectype = truth_type_for (vectype);
10537 final_mask = build_minus_one_cst (mask_vectype);
10538 }
10539 }
10540
10541 gcall *call;
10542 if (final_len && final_mask)
10543 {
10544 /* Emit:
10545 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10546 VEC_MASK, LEN, BIAS). */
10547 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10548 tree alias_ptr = build_int_cst (ref_type, align);
10549 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10550 dataref_ptr, alias_ptr,
10551 final_mask, final_len, bias);
10552 }
10553 else if (final_mask)
10554 {
10555 /* Emit:
10556 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10557 VEC_MASK). */
10558 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10559 tree alias_ptr = build_int_cst (ref_type, align);
10560 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10561 dataref_ptr, alias_ptr,
10562 final_mask);
10563 }
10564 else
10565 {
10566 /* Emit:
10567 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10568 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10569 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10570 }
10571 gimple_call_set_lhs (call, vec_array);
10572 gimple_call_set_nothrow (call, true);
10573 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10574
10575 dr_chain.create (vec_num);
10576 /* Extract each vector into an SSA_NAME. */
10577 for (i = 0; i < vec_num; i++)
10578 {
10579 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10580 vec_array, i);
10581 dr_chain.quick_push (new_temp);
10582 }
10583
10584 /* Record the mapping between SSA_NAMEs and statements. */
10585 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10586
10587 /* Record that VEC_ARRAY is now dead. */
10588 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10589
10590 dr_chain.release ();
10591
10592 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10593 }
10594
10595 if (costing_p && dump_enabled_p ())
10596 dump_printf_loc (MSG_NOTE, vect_location,
10597 "vect_model_load_cost: inside_cost = %u, "
10598 "prologue_cost = %u .\n",
10599 inside_cost, prologue_cost);
10600
10601 return true;
10602 }
10603
10604 if (memory_access_type == VMAT_GATHER_SCATTER)
10605 {
10606 gcc_assert (alignment_support_scheme == dr_aligned
10607 || alignment_support_scheme == dr_unaligned_supported);
10608 gcc_assert (!grouped_load && !slp_perm);
10609
10610 unsigned int inside_cost = 0, prologue_cost = 0;
10611 for (j = 0; j < ncopies; j++)
10612 {
10613 /* 1. Create the vector or array pointer update chain. */
10614 if (j == 0 && !costing_p)
10615 {
10616 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10617 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10618 slp_node, &gs_info, &dataref_ptr,
10619 &vec_offsets);
10620 else
10621 dataref_ptr
10622 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10623 at_loop, offset, &dummy, gsi,
10624 &ptr_incr, false, bump);
10625 }
10626 else if (!costing_p)
10627 {
10628 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10629 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10630 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10631 gsi, stmt_info, bump);
10632 }
10633
10634 if (mask && !costing_p)
10635 vec_mask = vec_masks[j];
10636
10637 gimple *new_stmt = NULL;
10638 for (i = 0; i < vec_num; i++)
10639 {
10640 tree final_mask = NULL_TREE;
10641 tree final_len = NULL_TREE;
10642 tree bias = NULL_TREE;
10643 if (!costing_p)
10644 {
10645 if (loop_masks)
10646 final_mask
10647 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10648 vec_num * ncopies, vectype,
10649 vec_num * j + i);
10650 if (vec_mask)
10651 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10652 final_mask, vec_mask, gsi);
10653
10654 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10655 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10656 gsi, stmt_info, bump);
10657 }
10658
10659 /* 2. Create the vector-load in the loop. */
10660 unsigned HOST_WIDE_INT align;
10661 if (gs_info.ifn != IFN_LAST)
10662 {
10663 if (costing_p)
10664 {
10665 unsigned int cnunits = vect_nunits_for_cost (vectype);
10666 inside_cost
10667 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10668 stmt_info, 0, vect_body);
10669 continue;
10670 }
10671 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10672 vec_offset = vec_offsets[vec_num * j + i];
10673 tree zero = build_zero_cst (vectype);
10674 tree scale = size_int (gs_info.scale);
10675
10676 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10677 {
10678 if (loop_lens)
10679 final_len
10680 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10681 vec_num * ncopies, vectype,
10682 vec_num * j + i, 1);
10683 else
10684 final_len
10685 = build_int_cst (sizetype,
10686 TYPE_VECTOR_SUBPARTS (vectype));
10687 signed char biasval
10688 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10689 bias = build_int_cst (intQI_type_node, biasval);
10690 if (!final_mask)
10691 {
10692 mask_vectype = truth_type_for (vectype);
10693 final_mask = build_minus_one_cst (mask_vectype);
10694 }
10695 }
10696
10697 gcall *call;
10698 if (final_len && final_mask)
10699 call
10700 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
10701 dataref_ptr, vec_offset,
10702 scale, zero, final_mask,
10703 final_len, bias);
10704 else if (final_mask)
10705 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10706 dataref_ptr, vec_offset,
10707 scale, zero, final_mask);
10708 else
10709 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10710 dataref_ptr, vec_offset,
10711 scale, zero);
10712 gimple_call_set_nothrow (call, true);
10713 new_stmt = call;
10714 data_ref = NULL_TREE;
10715 }
10716 else
10717 {
10718 /* Emulated gather-scatter. */
10719 gcc_assert (!final_mask);
10720 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
10721 if (costing_p)
10722 {
10723 /* For emulated gathers N offset vector element
10724 offset add is consumed by the load). */
10725 inside_cost = record_stmt_cost (cost_vec, const_nunits,
10726 vec_to_scalar, stmt_info,
10727 0, vect_body);
10728 /* N scalar loads plus gathering them into a
10729 vector. */
10730 inside_cost
10731 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
10732 stmt_info, 0, vect_body);
10733 inside_cost
10734 = record_stmt_cost (cost_vec, 1, vec_construct,
10735 stmt_info, 0, vect_body);
10736 continue;
10737 }
10738 unsigned HOST_WIDE_INT const_offset_nunits
10739 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
10740 .to_constant ();
10741 vec<constructor_elt, va_gc> *ctor_elts;
10742 vec_alloc (ctor_elts, const_nunits);
10743 gimple_seq stmts = NULL;
10744 /* We support offset vectors with more elements
10745 than the data vector for now. */
10746 unsigned HOST_WIDE_INT factor
10747 = const_offset_nunits / const_nunits;
10748 vec_offset = vec_offsets[j / factor];
10749 unsigned elt_offset = (j % factor) * const_nunits;
10750 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
10751 tree scale = size_int (gs_info.scale);
10752 align = get_object_alignment (DR_REF (first_dr_info->dr));
10753 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
10754 for (unsigned k = 0; k < const_nunits; ++k)
10755 {
10756 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
10757 bitsize_int (k + elt_offset));
10758 tree idx
10759 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
10760 vec_offset, TYPE_SIZE (idx_type), boff);
10761 idx = gimple_convert (&stmts, sizetype, idx);
10762 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
10763 scale);
10764 tree ptr = gimple_build (&stmts, PLUS_EXPR,
10765 TREE_TYPE (dataref_ptr),
10766 dataref_ptr, idx);
10767 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
10768 tree elt = make_ssa_name (TREE_TYPE (vectype));
10769 tree ref = build2 (MEM_REF, ltype, ptr,
10770 build_int_cst (ref_type, 0));
10771 new_stmt = gimple_build_assign (elt, ref);
10772 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
10773 gimple_seq_add_stmt (&stmts, new_stmt);
10774 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
10775 }
10776 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
10777 new_stmt = gimple_build_assign (
10778 NULL_TREE, build_constructor (vectype, ctor_elts));
10779 data_ref = NULL_TREE;
10780 }
10781
10782 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10783 /* DATA_REF is null if we've already built the statement. */
10784 if (data_ref)
10785 {
10786 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10787 new_stmt = gimple_build_assign (vec_dest, data_ref);
10788 }
10789 new_temp = make_ssa_name (vec_dest, new_stmt);
10790 gimple_set_lhs (new_stmt, new_temp);
10791 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10792
10793 /* Store vector loads in the corresponding SLP_NODE. */
10794 if (slp)
10795 slp_node->push_vec_def (new_stmt);
10796 }
10797
10798 if (!slp && !costing_p)
10799 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10800 }
10801
10802 if (!slp && !costing_p)
10803 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10804
10805 if (costing_p && dump_enabled_p ())
10806 dump_printf_loc (MSG_NOTE, vect_location,
10807 "vect_model_load_cost: inside_cost = %u, "
10808 "prologue_cost = %u .\n",
10809 inside_cost, prologue_cost);
10810 return true;
10811 }
10812
10813 poly_uint64 group_elt = 0;
10814 unsigned int inside_cost = 0, prologue_cost = 0;
10815 for (j = 0; j < ncopies; j++)
10816 {
10817 /* 1. Create the vector or array pointer update chain. */
10818 if (j == 0 && !costing_p)
10819 {
10820 bool simd_lane_access_p
10821 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
10822 if (simd_lane_access_p
10823 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
10824 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
10825 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
10826 && integer_zerop (DR_INIT (first_dr_info->dr))
10827 && alias_sets_conflict_p (get_alias_set (aggr_type),
10828 get_alias_set (TREE_TYPE (ref_type)))
10829 && (alignment_support_scheme == dr_aligned
10830 || alignment_support_scheme == dr_unaligned_supported))
10831 {
10832 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
10833 dataref_offset = build_int_cst (ref_type, 0);
10834 }
10835 else if (diff_first_stmt_info)
10836 {
10837 dataref_ptr
10838 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
10839 aggr_type, at_loop, offset, &dummy,
10840 gsi, &ptr_incr, simd_lane_access_p,
10841 bump);
10842 /* Adjust the pointer by the difference to first_stmt. */
10843 data_reference_p ptrdr
10844 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
10845 tree diff
10846 = fold_convert (sizetype,
10847 size_binop (MINUS_EXPR,
10848 DR_INIT (first_dr_info->dr),
10849 DR_INIT (ptrdr)));
10850 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10851 stmt_info, diff);
10852 if (alignment_support_scheme == dr_explicit_realign)
10853 {
10854 msq = vect_setup_realignment (vinfo,
10855 first_stmt_info_for_drptr, gsi,
10856 &realignment_token,
10857 alignment_support_scheme,
10858 dataref_ptr, &at_loop);
10859 gcc_assert (!compute_in_loop);
10860 }
10861 }
10862 else
10863 dataref_ptr
10864 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10865 at_loop,
10866 offset, &dummy, gsi, &ptr_incr,
10867 simd_lane_access_p, bump);
10868 if (mask)
10869 vec_mask = vec_masks[0];
10870 }
10871 else if (!costing_p)
10872 {
10873 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10874 if (dataref_offset)
10875 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
10876 bump);
10877 else
10878 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10879 stmt_info, bump);
10880 if (mask)
10881 vec_mask = vec_masks[j];
10882 }
10883
10884 if (grouped_load || slp_perm)
10885 dr_chain.create (vec_num);
10886
10887 gimple *new_stmt = NULL;
10888 for (i = 0; i < vec_num; i++)
10889 {
10890 tree final_mask = NULL_TREE;
10891 tree final_len = NULL_TREE;
10892 tree bias = NULL_TREE;
10893 if (!costing_p)
10894 {
10895 if (loop_masks)
10896 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10897 vec_num * ncopies, vectype,
10898 vec_num * j + i);
10899 if (vec_mask)
10900 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10901 final_mask, vec_mask, gsi);
10902
10903 if (i > 0)
10904 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10905 gsi, stmt_info, bump);
10906 }
10907
10908 /* 2. Create the vector-load in the loop. */
10909 switch (alignment_support_scheme)
10910 {
10911 case dr_aligned:
10912 case dr_unaligned_supported:
10913 {
10914 if (costing_p)
10915 break;
10916
10917 unsigned int misalign;
10918 unsigned HOST_WIDE_INT align;
10919 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
10920 if (alignment_support_scheme == dr_aligned)
10921 misalign = 0;
10922 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
10923 {
10924 align
10925 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
10926 misalign = 0;
10927 }
10928 else
10929 misalign = misalignment;
10930 if (dataref_offset == NULL_TREE
10931 && TREE_CODE (dataref_ptr) == SSA_NAME)
10932 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
10933 misalign);
10934 align = least_bit_hwi (misalign | align);
10935
10936 /* Compute IFN when LOOP_LENS or final_mask valid. */
10937 machine_mode vmode = TYPE_MODE (vectype);
10938 machine_mode new_vmode = vmode;
10939 internal_fn partial_ifn = IFN_LAST;
10940 if (loop_lens)
10941 {
10942 opt_machine_mode new_ovmode
10943 = get_len_load_store_mode (vmode, true, &partial_ifn);
10944 new_vmode = new_ovmode.require ();
10945 unsigned factor
10946 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
10947 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10948 vec_num * ncopies, vectype,
10949 vec_num * j + i, factor);
10950 }
10951 else if (final_mask)
10952 {
10953 if (!can_vec_mask_load_store_p (
10954 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
10955 &partial_ifn))
10956 gcc_unreachable ();
10957 }
10958
10959 if (partial_ifn == IFN_MASK_LEN_LOAD)
10960 {
10961 if (!final_len)
10962 {
10963 /* Pass VF value to 'len' argument of
10964 MASK_LEN_LOAD if LOOP_LENS is invalid. */
10965 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10966 }
10967 if (!final_mask)
10968 {
10969 /* Pass all ones value to 'mask' argument of
10970 MASK_LEN_LOAD if final_mask is invalid. */
10971 mask_vectype = truth_type_for (vectype);
10972 final_mask = build_minus_one_cst (mask_vectype);
10973 }
10974 }
10975 if (final_len)
10976 {
10977 signed char biasval
10978 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10979
10980 bias = build_int_cst (intQI_type_node, biasval);
10981 }
10982
10983 if (final_len)
10984 {
10985 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
10986 gcall *call;
10987 if (partial_ifn == IFN_MASK_LEN_LOAD)
10988 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
10989 dataref_ptr, ptr,
10990 final_mask, final_len,
10991 bias);
10992 else
10993 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
10994 dataref_ptr, ptr,
10995 final_len, bias);
10996 gimple_call_set_nothrow (call, true);
10997 new_stmt = call;
10998 data_ref = NULL_TREE;
10999
11000 /* Need conversion if it's wrapped with VnQI. */
11001 if (vmode != new_vmode)
11002 {
11003 tree new_vtype = build_vector_type_for_mode (
11004 unsigned_intQI_type_node, new_vmode);
11005 tree var
11006 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11007 gimple_set_lhs (call, var);
11008 vect_finish_stmt_generation (vinfo, stmt_info, call,
11009 gsi);
11010 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11011 new_stmt = gimple_build_assign (vec_dest,
11012 VIEW_CONVERT_EXPR, op);
11013 }
11014 }
11015 else if (final_mask)
11016 {
11017 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11018 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11019 dataref_ptr, ptr,
11020 final_mask);
11021 gimple_call_set_nothrow (call, true);
11022 new_stmt = call;
11023 data_ref = NULL_TREE;
11024 }
11025 else
11026 {
11027 tree ltype = vectype;
11028 tree new_vtype = NULL_TREE;
11029 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11030 unsigned int vect_align
11031 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11032 unsigned int scalar_dr_size
11033 = vect_get_scalar_dr_size (first_dr_info);
11034 /* If there's no peeling for gaps but we have a gap
11035 with slp loads then load the lower half of the
11036 vector only. See get_group_load_store_type for
11037 when we apply this optimization. */
11038 if (slp
11039 && loop_vinfo
11040 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11041 && known_eq (nunits, (group_size - gap) * 2)
11042 && known_eq (nunits, group_size)
11043 && gap >= (vect_align / scalar_dr_size))
11044 {
11045 tree half_vtype;
11046 new_vtype
11047 = vector_vector_composition_type (vectype, 2,
11048 &half_vtype);
11049 if (new_vtype != NULL_TREE)
11050 ltype = half_vtype;
11051 }
11052 tree offset
11053 = (dataref_offset ? dataref_offset
11054 : build_int_cst (ref_type, 0));
11055 if (ltype != vectype
11056 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11057 {
11058 unsigned HOST_WIDE_INT gap_offset
11059 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11060 tree gapcst = build_int_cst (ref_type, gap_offset);
11061 offset = size_binop (PLUS_EXPR, offset, gapcst);
11062 }
11063 data_ref
11064 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11065 if (alignment_support_scheme == dr_aligned)
11066 ;
11067 else
11068 TREE_TYPE (data_ref)
11069 = build_aligned_type (TREE_TYPE (data_ref),
11070 align * BITS_PER_UNIT);
11071 if (ltype != vectype)
11072 {
11073 vect_copy_ref_info (data_ref,
11074 DR_REF (first_dr_info->dr));
11075 tree tem = make_ssa_name (ltype);
11076 new_stmt = gimple_build_assign (tem, data_ref);
11077 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11078 gsi);
11079 data_ref = NULL;
11080 vec<constructor_elt, va_gc> *v;
11081 vec_alloc (v, 2);
11082 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11083 {
11084 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11085 build_zero_cst (ltype));
11086 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11087 }
11088 else
11089 {
11090 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11091 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11092 build_zero_cst (ltype));
11093 }
11094 gcc_assert (new_vtype != NULL_TREE);
11095 if (new_vtype == vectype)
11096 new_stmt = gimple_build_assign (
11097 vec_dest, build_constructor (vectype, v));
11098 else
11099 {
11100 tree new_vname = make_ssa_name (new_vtype);
11101 new_stmt = gimple_build_assign (
11102 new_vname, build_constructor (new_vtype, v));
11103 vect_finish_stmt_generation (vinfo, stmt_info,
11104 new_stmt, gsi);
11105 new_stmt = gimple_build_assign (
11106 vec_dest,
11107 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11108 }
11109 }
11110 }
11111 break;
11112 }
11113 case dr_explicit_realign:
11114 {
11115 if (costing_p)
11116 break;
11117 tree ptr, bump;
11118
11119 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11120
11121 if (compute_in_loop)
11122 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11123 &realignment_token,
11124 dr_explicit_realign,
11125 dataref_ptr, NULL);
11126
11127 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11128 ptr = copy_ssa_name (dataref_ptr);
11129 else
11130 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11131 // For explicit realign the target alignment should be
11132 // known at compile time.
11133 unsigned HOST_WIDE_INT align
11134 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11135 new_stmt = gimple_build_assign (
11136 ptr, BIT_AND_EXPR, dataref_ptr,
11137 build_int_cst (TREE_TYPE (dataref_ptr),
11138 -(HOST_WIDE_INT) align));
11139 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11140 data_ref
11141 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11142 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11143 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11144 new_stmt = gimple_build_assign (vec_dest, data_ref);
11145 new_temp = make_ssa_name (vec_dest, new_stmt);
11146 gimple_assign_set_lhs (new_stmt, new_temp);
11147 gimple_move_vops (new_stmt, stmt_info->stmt);
11148 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11149 msq = new_temp;
11150
11151 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11152 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11153 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11154 bump);
11155 new_stmt = gimple_build_assign (
11156 NULL_TREE, BIT_AND_EXPR, ptr,
11157 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11158 if (TREE_CODE (ptr) == SSA_NAME)
11159 ptr = copy_ssa_name (ptr, new_stmt);
11160 else
11161 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11162 gimple_assign_set_lhs (new_stmt, ptr);
11163 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11164 data_ref
11165 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11166 break;
11167 }
11168 case dr_explicit_realign_optimized:
11169 {
11170 if (costing_p)
11171 break;
11172 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11173 new_temp = copy_ssa_name (dataref_ptr);
11174 else
11175 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11176 // We should only be doing this if we know the target
11177 // alignment at compile time.
11178 unsigned HOST_WIDE_INT align
11179 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11180 new_stmt = gimple_build_assign (
11181 new_temp, BIT_AND_EXPR, dataref_ptr,
11182 build_int_cst (TREE_TYPE (dataref_ptr),
11183 -(HOST_WIDE_INT) align));
11184 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11185 data_ref = build2 (MEM_REF, vectype, new_temp,
11186 build_int_cst (ref_type, 0));
11187 break;
11188 }
11189 default:
11190 gcc_unreachable ();
11191 }
11192
11193 /* One common place to cost the above vect load for different
11194 alignment support schemes. */
11195 if (costing_p)
11196 {
11197 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11198 only need to take care of the first stmt, whose
11199 stmt_info is first_stmt_info, vec_num iterating on it
11200 will cover the cost for the remaining, it's consistent
11201 with transforming. For the prologue cost for realign,
11202 we only need to count it once for the whole group. */
11203 bool first_stmt_info_p = first_stmt_info == stmt_info;
11204 bool add_realign_cost = first_stmt_info_p && i == 0;
11205 if (memory_access_type == VMAT_CONTIGUOUS
11206 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11207 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11208 && (!grouped_load || first_stmt_info_p)))
11209 vect_get_load_cost (vinfo, stmt_info, 1,
11210 alignment_support_scheme, misalignment,
11211 add_realign_cost, &inside_cost,
11212 &prologue_cost, cost_vec, cost_vec, true);
11213 }
11214 else
11215 {
11216 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11217 /* DATA_REF is null if we've already built the statement. */
11218 if (data_ref)
11219 {
11220 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11221 new_stmt = gimple_build_assign (vec_dest, data_ref);
11222 }
11223 new_temp = make_ssa_name (vec_dest, new_stmt);
11224 gimple_set_lhs (new_stmt, new_temp);
11225 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11226 }
11227
11228 /* 3. Handle explicit realignment if necessary/supported.
11229 Create in loop:
11230 vec_dest = realign_load (msq, lsq, realignment_token) */
11231 if (!costing_p
11232 && (alignment_support_scheme == dr_explicit_realign_optimized
11233 || alignment_support_scheme == dr_explicit_realign))
11234 {
11235 lsq = gimple_assign_lhs (new_stmt);
11236 if (!realignment_token)
11237 realignment_token = dataref_ptr;
11238 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11239 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11240 lsq, realignment_token);
11241 new_temp = make_ssa_name (vec_dest, new_stmt);
11242 gimple_assign_set_lhs (new_stmt, new_temp);
11243 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11244
11245 if (alignment_support_scheme == dr_explicit_realign_optimized)
11246 {
11247 gcc_assert (phi);
11248 if (i == vec_num - 1 && j == ncopies - 1)
11249 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11250 UNKNOWN_LOCATION);
11251 msq = lsq;
11252 }
11253 }
11254
11255 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11256 {
11257 if (costing_p)
11258 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11259 stmt_info, 0, vect_body);
11260 else
11261 {
11262 tree perm_mask = perm_mask_for_reverse (vectype);
11263 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11264 perm_mask, stmt_info, gsi);
11265 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11266 }
11267 }
11268
11269 /* Collect vector loads and later create their permutation in
11270 vect_transform_grouped_load (). */
11271 if (!costing_p && (grouped_load || slp_perm))
11272 dr_chain.quick_push (new_temp);
11273
11274 /* Store vector loads in the corresponding SLP_NODE. */
11275 if (!costing_p && slp && !slp_perm)
11276 slp_node->push_vec_def (new_stmt);
11277
11278 /* With SLP permutation we load the gaps as well, without
11279 we need to skip the gaps after we manage to fully load
11280 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11281 group_elt += nunits;
11282 if (!costing_p
11283 && maybe_ne (group_gap_adj, 0U)
11284 && !slp_perm
11285 && known_eq (group_elt, group_size - group_gap_adj))
11286 {
11287 poly_wide_int bump_val
11288 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11289 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11290 == -1)
11291 bump_val = -bump_val;
11292 tree bump = wide_int_to_tree (sizetype, bump_val);
11293 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11294 stmt_info, bump);
11295 group_elt = 0;
11296 }
11297 }
11298 /* Bump the vector pointer to account for a gap or for excess
11299 elements loaded for a permuted SLP load. */
11300 if (!costing_p
11301 && maybe_ne (group_gap_adj, 0U)
11302 && slp_perm)
11303 {
11304 poly_wide_int bump_val
11305 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11306 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11307 bump_val = -bump_val;
11308 tree bump = wide_int_to_tree (sizetype, bump_val);
11309 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11310 stmt_info, bump);
11311 }
11312
11313 if (slp && !slp_perm)
11314 continue;
11315
11316 if (slp_perm)
11317 {
11318 unsigned n_perms;
11319 /* For SLP we know we've seen all possible uses of dr_chain so
11320 direct vect_transform_slp_perm_load to DCE the unused parts.
11321 ??? This is a hack to prevent compile-time issues as seen
11322 in PR101120 and friends. */
11323 if (costing_p)
11324 {
11325 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11326 true, &n_perms, nullptr);
11327 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11328 stmt_info, 0, vect_body);
11329 }
11330 else
11331 {
11332 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11333 gsi, vf, false, &n_perms,
11334 nullptr, true);
11335 gcc_assert (ok);
11336 }
11337 }
11338 else
11339 {
11340 if (grouped_load)
11341 {
11342 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11343 /* We assume that the cost of a single load-lanes instruction
11344 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11345 If a grouped access is instead being provided by a
11346 load-and-permute operation, include the cost of the
11347 permutes. */
11348 if (costing_p && first_stmt_info == stmt_info)
11349 {
11350 /* Uses an even and odd extract operations or shuffle
11351 operations for each needed permute. */
11352 int group_size = DR_GROUP_SIZE (first_stmt_info);
11353 int nstmts = ceil_log2 (group_size) * group_size;
11354 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11355 stmt_info, 0, vect_body);
11356
11357 if (dump_enabled_p ())
11358 dump_printf_loc (MSG_NOTE, vect_location,
11359 "vect_model_load_cost:"
11360 "strided group_size = %d .\n",
11361 group_size);
11362 }
11363 else if (!costing_p)
11364 {
11365 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11366 group_size, gsi);
11367 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11368 }
11369 }
11370 else if (!costing_p)
11371 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11372 }
11373 dr_chain.release ();
11374 }
11375 if (!slp && !costing_p)
11376 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11377
11378 if (costing_p)
11379 {
11380 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11381 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11382 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11383 if (dump_enabled_p ())
11384 dump_printf_loc (MSG_NOTE, vect_location,
11385 "vect_model_load_cost: inside_cost = %u, "
11386 "prologue_cost = %u .\n",
11387 inside_cost, prologue_cost);
11388 }
11389
11390 return true;
11391 }
11392
11393 /* Function vect_is_simple_cond.
11394
11395 Input:
11396 LOOP - the loop that is being vectorized.
11397 COND - Condition that is checked for simple use.
11398
11399 Output:
11400 *COMP_VECTYPE - the vector type for the comparison.
11401 *DTS - The def types for the arguments of the comparison
11402
11403 Returns whether a COND can be vectorized. Checks whether
11404 condition operands are supportable using vec_is_simple_use. */
11405
11406 static bool
11407 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11408 slp_tree slp_node, tree *comp_vectype,
11409 enum vect_def_type *dts, tree vectype)
11410 {
11411 tree lhs, rhs;
11412 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11413 slp_tree slp_op;
11414
11415 /* Mask case. */
11416 if (TREE_CODE (cond) == SSA_NAME
11417 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11418 {
11419 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11420 &slp_op, &dts[0], comp_vectype)
11421 || !*comp_vectype
11422 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11423 return false;
11424 return true;
11425 }
11426
11427 if (!COMPARISON_CLASS_P (cond))
11428 return false;
11429
11430 lhs = TREE_OPERAND (cond, 0);
11431 rhs = TREE_OPERAND (cond, 1);
11432
11433 if (TREE_CODE (lhs) == SSA_NAME)
11434 {
11435 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11436 &lhs, &slp_op, &dts[0], &vectype1))
11437 return false;
11438 }
11439 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11440 || TREE_CODE (lhs) == FIXED_CST)
11441 dts[0] = vect_constant_def;
11442 else
11443 return false;
11444
11445 if (TREE_CODE (rhs) == SSA_NAME)
11446 {
11447 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11448 &rhs, &slp_op, &dts[1], &vectype2))
11449 return false;
11450 }
11451 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11452 || TREE_CODE (rhs) == FIXED_CST)
11453 dts[1] = vect_constant_def;
11454 else
11455 return false;
11456
11457 if (vectype1 && vectype2
11458 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11459 TYPE_VECTOR_SUBPARTS (vectype2)))
11460 return false;
11461
11462 *comp_vectype = vectype1 ? vectype1 : vectype2;
11463 /* Invariant comparison. */
11464 if (! *comp_vectype)
11465 {
11466 tree scalar_type = TREE_TYPE (lhs);
11467 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11468 *comp_vectype = truth_type_for (vectype);
11469 else
11470 {
11471 /* If we can widen the comparison to match vectype do so. */
11472 if (INTEGRAL_TYPE_P (scalar_type)
11473 && !slp_node
11474 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11475 TYPE_SIZE (TREE_TYPE (vectype))))
11476 scalar_type = build_nonstandard_integer_type
11477 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11478 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11479 slp_node);
11480 }
11481 }
11482
11483 return true;
11484 }
11485
11486 /* vectorizable_condition.
11487
11488 Check if STMT_INFO is conditional modify expression that can be vectorized.
11489 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11490 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11491 at GSI.
11492
11493 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11494
11495 Return true if STMT_INFO is vectorizable in this way. */
11496
11497 static bool
11498 vectorizable_condition (vec_info *vinfo,
11499 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11500 gimple **vec_stmt,
11501 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11502 {
11503 tree scalar_dest = NULL_TREE;
11504 tree vec_dest = NULL_TREE;
11505 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11506 tree then_clause, else_clause;
11507 tree comp_vectype = NULL_TREE;
11508 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11509 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11510 tree vec_compare;
11511 tree new_temp;
11512 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11513 enum vect_def_type dts[4]
11514 = {vect_unknown_def_type, vect_unknown_def_type,
11515 vect_unknown_def_type, vect_unknown_def_type};
11516 int ndts = 4;
11517 int ncopies;
11518 int vec_num;
11519 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11520 int i;
11521 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11522 vec<tree> vec_oprnds0 = vNULL;
11523 vec<tree> vec_oprnds1 = vNULL;
11524 vec<tree> vec_oprnds2 = vNULL;
11525 vec<tree> vec_oprnds3 = vNULL;
11526 tree vec_cmp_type;
11527 bool masked = false;
11528
11529 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11530 return false;
11531
11532 /* Is vectorizable conditional operation? */
11533 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11534 if (!stmt)
11535 return false;
11536
11537 code = gimple_assign_rhs_code (stmt);
11538 if (code != COND_EXPR)
11539 return false;
11540
11541 stmt_vec_info reduc_info = NULL;
11542 int reduc_index = -1;
11543 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11544 bool for_reduction
11545 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11546 if (for_reduction)
11547 {
11548 if (slp_node)
11549 return false;
11550 reduc_info = info_for_reduction (vinfo, stmt_info);
11551 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11552 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11553 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11554 || reduc_index != -1);
11555 }
11556 else
11557 {
11558 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11559 return false;
11560 }
11561
11562 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11563 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11564
11565 if (slp_node)
11566 {
11567 ncopies = 1;
11568 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11569 }
11570 else
11571 {
11572 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11573 vec_num = 1;
11574 }
11575
11576 gcc_assert (ncopies >= 1);
11577 if (for_reduction && ncopies > 1)
11578 return false; /* FORNOW */
11579
11580 cond_expr = gimple_assign_rhs1 (stmt);
11581
11582 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
11583 &comp_vectype, &dts[0], vectype)
11584 || !comp_vectype)
11585 return false;
11586
11587 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
11588 slp_tree then_slp_node, else_slp_node;
11589 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
11590 &then_clause, &then_slp_node, &dts[2], &vectype1))
11591 return false;
11592 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
11593 &else_clause, &else_slp_node, &dts[3], &vectype2))
11594 return false;
11595
11596 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
11597 return false;
11598
11599 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
11600 return false;
11601
11602 masked = !COMPARISON_CLASS_P (cond_expr);
11603 vec_cmp_type = truth_type_for (comp_vectype);
11604
11605 if (vec_cmp_type == NULL_TREE)
11606 return false;
11607
11608 cond_code = TREE_CODE (cond_expr);
11609 if (!masked)
11610 {
11611 cond_expr0 = TREE_OPERAND (cond_expr, 0);
11612 cond_expr1 = TREE_OPERAND (cond_expr, 1);
11613 }
11614
11615 /* For conditional reductions, the "then" value needs to be the candidate
11616 value calculated by this iteration while the "else" value needs to be
11617 the result carried over from previous iterations. If the COND_EXPR
11618 is the other way around, we need to swap it. */
11619 bool must_invert_cmp_result = false;
11620 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
11621 {
11622 if (masked)
11623 must_invert_cmp_result = true;
11624 else
11625 {
11626 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
11627 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
11628 if (new_code == ERROR_MARK)
11629 must_invert_cmp_result = true;
11630 else
11631 {
11632 cond_code = new_code;
11633 /* Make sure we don't accidentally use the old condition. */
11634 cond_expr = NULL_TREE;
11635 }
11636 }
11637 std::swap (then_clause, else_clause);
11638 }
11639
11640 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
11641 {
11642 /* Boolean values may have another representation in vectors
11643 and therefore we prefer bit operations over comparison for
11644 them (which also works for scalar masks). We store opcodes
11645 to use in bitop1 and bitop2. Statement is vectorized as
11646 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
11647 depending on bitop1 and bitop2 arity. */
11648 switch (cond_code)
11649 {
11650 case GT_EXPR:
11651 bitop1 = BIT_NOT_EXPR;
11652 bitop2 = BIT_AND_EXPR;
11653 break;
11654 case GE_EXPR:
11655 bitop1 = BIT_NOT_EXPR;
11656 bitop2 = BIT_IOR_EXPR;
11657 break;
11658 case LT_EXPR:
11659 bitop1 = BIT_NOT_EXPR;
11660 bitop2 = BIT_AND_EXPR;
11661 std::swap (cond_expr0, cond_expr1);
11662 break;
11663 case LE_EXPR:
11664 bitop1 = BIT_NOT_EXPR;
11665 bitop2 = BIT_IOR_EXPR;
11666 std::swap (cond_expr0, cond_expr1);
11667 break;
11668 case NE_EXPR:
11669 bitop1 = BIT_XOR_EXPR;
11670 break;
11671 case EQ_EXPR:
11672 bitop1 = BIT_XOR_EXPR;
11673 bitop2 = BIT_NOT_EXPR;
11674 break;
11675 default:
11676 return false;
11677 }
11678 cond_code = SSA_NAME;
11679 }
11680
11681 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
11682 && reduction_type == EXTRACT_LAST_REDUCTION
11683 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
11684 {
11685 if (dump_enabled_p ())
11686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11687 "reduction comparison operation not supported.\n");
11688 return false;
11689 }
11690
11691 if (!vec_stmt)
11692 {
11693 if (bitop1 != NOP_EXPR)
11694 {
11695 machine_mode mode = TYPE_MODE (comp_vectype);
11696 optab optab;
11697
11698 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
11699 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11700 return false;
11701
11702 if (bitop2 != NOP_EXPR)
11703 {
11704 optab = optab_for_tree_code (bitop2, comp_vectype,
11705 optab_default);
11706 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11707 return false;
11708 }
11709 }
11710
11711 vect_cost_for_stmt kind = vector_stmt;
11712 if (reduction_type == EXTRACT_LAST_REDUCTION)
11713 /* Count one reduction-like operation per vector. */
11714 kind = vec_to_scalar;
11715 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
11716 && (masked
11717 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
11718 cond_code)
11719 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
11720 ERROR_MARK))))
11721 return false;
11722
11723 if (slp_node
11724 && (!vect_maybe_update_slp_op_vectype
11725 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
11726 || (op_adjust == 1
11727 && !vect_maybe_update_slp_op_vectype
11728 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
11729 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
11730 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
11731 {
11732 if (dump_enabled_p ())
11733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11734 "incompatible vector types for invariants\n");
11735 return false;
11736 }
11737
11738 if (loop_vinfo && for_reduction
11739 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
11740 {
11741 if (reduction_type == EXTRACT_LAST_REDUCTION)
11742 {
11743 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
11744 vectype, OPTIMIZE_FOR_SPEED))
11745 vect_record_loop_len (loop_vinfo,
11746 &LOOP_VINFO_LENS (loop_vinfo),
11747 ncopies * vec_num, vectype, 1);
11748 else
11749 vect_record_loop_mask (loop_vinfo,
11750 &LOOP_VINFO_MASKS (loop_vinfo),
11751 ncopies * vec_num, vectype, NULL);
11752 }
11753 /* Extra inactive lanes should be safe for vect_nested_cycle. */
11754 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
11755 {
11756 if (dump_enabled_p ())
11757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11758 "conditional reduction prevents the use"
11759 " of partial vectors.\n");
11760 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
11761 }
11762 }
11763
11764 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
11765 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
11766 cost_vec, kind);
11767 return true;
11768 }
11769
11770 /* Transform. */
11771
11772 /* Handle def. */
11773 scalar_dest = gimple_assign_lhs (stmt);
11774 if (reduction_type != EXTRACT_LAST_REDUCTION)
11775 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11776
11777 bool swap_cond_operands = false;
11778
11779 /* See whether another part of the vectorized code applies a loop
11780 mask to the condition, or to its inverse. */
11781
11782 vec_loop_masks *masks = NULL;
11783 vec_loop_lens *lens = NULL;
11784 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
11785 {
11786 if (reduction_type == EXTRACT_LAST_REDUCTION)
11787 lens = &LOOP_VINFO_LENS (loop_vinfo);
11788 }
11789 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
11790 {
11791 if (reduction_type == EXTRACT_LAST_REDUCTION)
11792 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11793 else
11794 {
11795 scalar_cond_masked_key cond (cond_expr, ncopies);
11796 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
11797 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11798 else
11799 {
11800 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
11801 tree_code orig_code = cond.code;
11802 cond.code = invert_tree_comparison (cond.code, honor_nans);
11803 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
11804 {
11805 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11806 cond_code = cond.code;
11807 swap_cond_operands = true;
11808 }
11809 else
11810 {
11811 /* Try the inverse of the current mask. We check if the
11812 inverse mask is live and if so we generate a negate of
11813 the current mask such that we still honor NaNs. */
11814 cond.inverted_p = true;
11815 cond.code = orig_code;
11816 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
11817 {
11818 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11819 cond_code = cond.code;
11820 swap_cond_operands = true;
11821 must_invert_cmp_result = true;
11822 }
11823 }
11824 }
11825 }
11826 }
11827
11828 /* Handle cond expr. */
11829 if (masked)
11830 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11831 cond_expr, &vec_oprnds0, comp_vectype,
11832 then_clause, &vec_oprnds2, vectype,
11833 reduction_type != EXTRACT_LAST_REDUCTION
11834 ? else_clause : NULL, &vec_oprnds3, vectype);
11835 else
11836 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11837 cond_expr0, &vec_oprnds0, comp_vectype,
11838 cond_expr1, &vec_oprnds1, comp_vectype,
11839 then_clause, &vec_oprnds2, vectype,
11840 reduction_type != EXTRACT_LAST_REDUCTION
11841 ? else_clause : NULL, &vec_oprnds3, vectype);
11842
11843 /* Arguments are ready. Create the new vector stmt. */
11844 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
11845 {
11846 vec_then_clause = vec_oprnds2[i];
11847 if (reduction_type != EXTRACT_LAST_REDUCTION)
11848 vec_else_clause = vec_oprnds3[i];
11849
11850 if (swap_cond_operands)
11851 std::swap (vec_then_clause, vec_else_clause);
11852
11853 if (masked)
11854 vec_compare = vec_cond_lhs;
11855 else
11856 {
11857 vec_cond_rhs = vec_oprnds1[i];
11858 if (bitop1 == NOP_EXPR)
11859 {
11860 gimple_seq stmts = NULL;
11861 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
11862 vec_cond_lhs, vec_cond_rhs);
11863 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
11864 }
11865 else
11866 {
11867 new_temp = make_ssa_name (vec_cmp_type);
11868 gassign *new_stmt;
11869 if (bitop1 == BIT_NOT_EXPR)
11870 new_stmt = gimple_build_assign (new_temp, bitop1,
11871 vec_cond_rhs);
11872 else
11873 new_stmt
11874 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
11875 vec_cond_rhs);
11876 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11877 if (bitop2 == NOP_EXPR)
11878 vec_compare = new_temp;
11879 else if (bitop2 == BIT_NOT_EXPR
11880 && reduction_type != EXTRACT_LAST_REDUCTION)
11881 {
11882 /* Instead of doing ~x ? y : z do x ? z : y. */
11883 vec_compare = new_temp;
11884 std::swap (vec_then_clause, vec_else_clause);
11885 }
11886 else
11887 {
11888 vec_compare = make_ssa_name (vec_cmp_type);
11889 if (bitop2 == BIT_NOT_EXPR)
11890 new_stmt
11891 = gimple_build_assign (vec_compare, bitop2, new_temp);
11892 else
11893 new_stmt
11894 = gimple_build_assign (vec_compare, bitop2,
11895 vec_cond_lhs, new_temp);
11896 vect_finish_stmt_generation (vinfo, stmt_info,
11897 new_stmt, gsi);
11898 }
11899 }
11900 }
11901
11902 /* If we decided to apply a loop mask to the result of the vector
11903 comparison, AND the comparison with the mask now. Later passes
11904 should then be able to reuse the AND results between mulitple
11905 vector statements.
11906
11907 For example:
11908 for (int i = 0; i < 100; ++i)
11909 x[i] = y[i] ? z[i] : 10;
11910
11911 results in following optimized GIMPLE:
11912
11913 mask__35.8_43 = vect__4.7_41 != { 0, ... };
11914 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
11915 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
11916 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
11917 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
11918 vect_iftmp.11_47, { 10, ... }>;
11919
11920 instead of using a masked and unmasked forms of
11921 vec != { 0, ... } (masked in the MASK_LOAD,
11922 unmasked in the VEC_COND_EXPR). */
11923
11924 /* Force vec_compare to be an SSA_NAME rather than a comparison,
11925 in cases where that's necessary. */
11926
11927 tree len = NULL_TREE, bias = NULL_TREE;
11928 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
11929 {
11930 if (!is_gimple_val (vec_compare))
11931 {
11932 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11933 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11934 vec_compare);
11935 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11936 vec_compare = vec_compare_name;
11937 }
11938
11939 if (must_invert_cmp_result)
11940 {
11941 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11942 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11943 BIT_NOT_EXPR,
11944 vec_compare);
11945 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11946 vec_compare = vec_compare_name;
11947 }
11948
11949 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
11950 vectype, OPTIMIZE_FOR_SPEED))
11951 {
11952 if (lens)
11953 {
11954 len = vect_get_loop_len (loop_vinfo, gsi, lens,
11955 vec_num * ncopies, vectype, i, 1);
11956 signed char biasval
11957 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11958 bias = build_int_cst (intQI_type_node, biasval);
11959 }
11960 else
11961 {
11962 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11963 bias = build_int_cst (intQI_type_node, 0);
11964 }
11965 }
11966 if (masks)
11967 {
11968 tree loop_mask
11969 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
11970 vectype, i);
11971 tree tmp2 = make_ssa_name (vec_cmp_type);
11972 gassign *g
11973 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
11974 loop_mask);
11975 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
11976 vec_compare = tmp2;
11977 }
11978 }
11979
11980 gimple *new_stmt;
11981 if (reduction_type == EXTRACT_LAST_REDUCTION)
11982 {
11983 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
11984 tree lhs = gimple_get_lhs (old_stmt);
11985 if (len)
11986 new_stmt = gimple_build_call_internal
11987 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
11988 vec_then_clause, len, bias);
11989 else
11990 new_stmt = gimple_build_call_internal
11991 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
11992 vec_then_clause);
11993 gimple_call_set_lhs (new_stmt, lhs);
11994 SSA_NAME_DEF_STMT (lhs) = new_stmt;
11995 if (old_stmt == gsi_stmt (*gsi))
11996 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
11997 else
11998 {
11999 /* In this case we're moving the definition to later in the
12000 block. That doesn't matter because the only uses of the
12001 lhs are in phi statements. */
12002 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12003 gsi_remove (&old_gsi, true);
12004 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12005 }
12006 }
12007 else
12008 {
12009 new_temp = make_ssa_name (vec_dest);
12010 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12011 vec_then_clause, vec_else_clause);
12012 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12013 }
12014 if (slp_node)
12015 slp_node->push_vec_def (new_stmt);
12016 else
12017 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12018 }
12019
12020 if (!slp_node)
12021 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12022
12023 vec_oprnds0.release ();
12024 vec_oprnds1.release ();
12025 vec_oprnds2.release ();
12026 vec_oprnds3.release ();
12027
12028 return true;
12029 }
12030
12031 /* vectorizable_comparison.
12032
12033 Check if STMT_INFO is comparison expression that can be vectorized.
12034 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12035 comparison, put it in VEC_STMT, and insert it at GSI.
12036
12037 Return true if STMT_INFO is vectorizable in this way. */
12038
12039 static bool
12040 vectorizable_comparison (vec_info *vinfo,
12041 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12042 gimple **vec_stmt,
12043 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12044 {
12045 tree lhs, rhs1, rhs2;
12046 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12047 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12048 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12049 tree new_temp;
12050 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12051 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12052 int ndts = 2;
12053 poly_uint64 nunits;
12054 int ncopies;
12055 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12056 int i;
12057 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12058 vec<tree> vec_oprnds0 = vNULL;
12059 vec<tree> vec_oprnds1 = vNULL;
12060 tree mask_type;
12061 tree mask;
12062
12063 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12064 return false;
12065
12066 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12067 return false;
12068
12069 mask_type = vectype;
12070 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12071
12072 if (slp_node)
12073 ncopies = 1;
12074 else
12075 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12076
12077 gcc_assert (ncopies >= 1);
12078 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12079 return false;
12080
12081 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12082 if (!stmt)
12083 return false;
12084
12085 code = gimple_assign_rhs_code (stmt);
12086
12087 if (TREE_CODE_CLASS (code) != tcc_comparison)
12088 return false;
12089
12090 slp_tree slp_rhs1, slp_rhs2;
12091 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12092 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12093 return false;
12094
12095 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12096 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12097 return false;
12098
12099 if (vectype1 && vectype2
12100 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12101 TYPE_VECTOR_SUBPARTS (vectype2)))
12102 return false;
12103
12104 vectype = vectype1 ? vectype1 : vectype2;
12105
12106 /* Invariant comparison. */
12107 if (!vectype)
12108 {
12109 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12110 vectype = mask_type;
12111 else
12112 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12113 slp_node);
12114 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12115 return false;
12116 }
12117 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12118 return false;
12119
12120 /* Can't compare mask and non-mask types. */
12121 if (vectype1 && vectype2
12122 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12123 return false;
12124
12125 /* Boolean values may have another representation in vectors
12126 and therefore we prefer bit operations over comparison for
12127 them (which also works for scalar masks). We store opcodes
12128 to use in bitop1 and bitop2. Statement is vectorized as
12129 BITOP2 (rhs1 BITOP1 rhs2) or
12130 rhs1 BITOP2 (BITOP1 rhs2)
12131 depending on bitop1 and bitop2 arity. */
12132 bool swap_p = false;
12133 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12134 {
12135 if (code == GT_EXPR)
12136 {
12137 bitop1 = BIT_NOT_EXPR;
12138 bitop2 = BIT_AND_EXPR;
12139 }
12140 else if (code == GE_EXPR)
12141 {
12142 bitop1 = BIT_NOT_EXPR;
12143 bitop2 = BIT_IOR_EXPR;
12144 }
12145 else if (code == LT_EXPR)
12146 {
12147 bitop1 = BIT_NOT_EXPR;
12148 bitop2 = BIT_AND_EXPR;
12149 swap_p = true;
12150 }
12151 else if (code == LE_EXPR)
12152 {
12153 bitop1 = BIT_NOT_EXPR;
12154 bitop2 = BIT_IOR_EXPR;
12155 swap_p = true;
12156 }
12157 else
12158 {
12159 bitop1 = BIT_XOR_EXPR;
12160 if (code == EQ_EXPR)
12161 bitop2 = BIT_NOT_EXPR;
12162 }
12163 }
12164
12165 if (!vec_stmt)
12166 {
12167 if (bitop1 == NOP_EXPR)
12168 {
12169 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12170 return false;
12171 }
12172 else
12173 {
12174 machine_mode mode = TYPE_MODE (vectype);
12175 optab optab;
12176
12177 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12178 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12179 return false;
12180
12181 if (bitop2 != NOP_EXPR)
12182 {
12183 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12184 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12185 return false;
12186 }
12187 }
12188
12189 /* Put types on constant and invariant SLP children. */
12190 if (slp_node
12191 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12192 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12193 {
12194 if (dump_enabled_p ())
12195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12196 "incompatible vector types for invariants\n");
12197 return false;
12198 }
12199
12200 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12201 vect_model_simple_cost (vinfo, stmt_info,
12202 ncopies * (1 + (bitop2 != NOP_EXPR)),
12203 dts, ndts, slp_node, cost_vec);
12204 return true;
12205 }
12206
12207 /* Transform. */
12208
12209 /* Handle def. */
12210 lhs = gimple_assign_lhs (stmt);
12211 mask = vect_create_destination_var (lhs, mask_type);
12212
12213 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12214 rhs1, &vec_oprnds0, vectype,
12215 rhs2, &vec_oprnds1, vectype);
12216 if (swap_p)
12217 std::swap (vec_oprnds0, vec_oprnds1);
12218
12219 /* Arguments are ready. Create the new vector stmt. */
12220 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12221 {
12222 gimple *new_stmt;
12223 vec_rhs2 = vec_oprnds1[i];
12224
12225 new_temp = make_ssa_name (mask);
12226 if (bitop1 == NOP_EXPR)
12227 {
12228 new_stmt = gimple_build_assign (new_temp, code,
12229 vec_rhs1, vec_rhs2);
12230 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12231 }
12232 else
12233 {
12234 if (bitop1 == BIT_NOT_EXPR)
12235 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12236 else
12237 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12238 vec_rhs2);
12239 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12240 if (bitop2 != NOP_EXPR)
12241 {
12242 tree res = make_ssa_name (mask);
12243 if (bitop2 == BIT_NOT_EXPR)
12244 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12245 else
12246 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12247 new_temp);
12248 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12249 }
12250 }
12251 if (slp_node)
12252 slp_node->push_vec_def (new_stmt);
12253 else
12254 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12255 }
12256
12257 if (!slp_node)
12258 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12259
12260 vec_oprnds0.release ();
12261 vec_oprnds1.release ();
12262
12263 return true;
12264 }
12265
12266 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12267 can handle all live statements in the node. Otherwise return true
12268 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12269 VEC_STMT_P is as for vectorizable_live_operation. */
12270
12271 static bool
12272 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12273 slp_tree slp_node, slp_instance slp_node_instance,
12274 bool vec_stmt_p,
12275 stmt_vector_for_cost *cost_vec)
12276 {
12277 if (slp_node)
12278 {
12279 stmt_vec_info slp_stmt_info;
12280 unsigned int i;
12281 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12282 {
12283 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12284 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12285 slp_node_instance, i,
12286 vec_stmt_p, cost_vec))
12287 return false;
12288 }
12289 }
12290 else if (STMT_VINFO_LIVE_P (stmt_info)
12291 && !vectorizable_live_operation (vinfo, stmt_info,
12292 slp_node, slp_node_instance, -1,
12293 vec_stmt_p, cost_vec))
12294 return false;
12295
12296 return true;
12297 }
12298
12299 /* Make sure the statement is vectorizable. */
12300
12301 opt_result
12302 vect_analyze_stmt (vec_info *vinfo,
12303 stmt_vec_info stmt_info, bool *need_to_vectorize,
12304 slp_tree node, slp_instance node_instance,
12305 stmt_vector_for_cost *cost_vec)
12306 {
12307 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12308 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12309 bool ok;
12310 gimple_seq pattern_def_seq;
12311
12312 if (dump_enabled_p ())
12313 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12314 stmt_info->stmt);
12315
12316 if (gimple_has_volatile_ops (stmt_info->stmt))
12317 return opt_result::failure_at (stmt_info->stmt,
12318 "not vectorized:"
12319 " stmt has volatile operands: %G\n",
12320 stmt_info->stmt);
12321
12322 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12323 && node == NULL
12324 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12325 {
12326 gimple_stmt_iterator si;
12327
12328 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12329 {
12330 stmt_vec_info pattern_def_stmt_info
12331 = vinfo->lookup_stmt (gsi_stmt (si));
12332 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12333 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12334 {
12335 /* Analyze def stmt of STMT if it's a pattern stmt. */
12336 if (dump_enabled_p ())
12337 dump_printf_loc (MSG_NOTE, vect_location,
12338 "==> examining pattern def statement: %G",
12339 pattern_def_stmt_info->stmt);
12340
12341 opt_result res
12342 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12343 need_to_vectorize, node, node_instance,
12344 cost_vec);
12345 if (!res)
12346 return res;
12347 }
12348 }
12349 }
12350
12351 /* Skip stmts that do not need to be vectorized. In loops this is expected
12352 to include:
12353 - the COND_EXPR which is the loop exit condition
12354 - any LABEL_EXPRs in the loop
12355 - computations that are used only for array indexing or loop control.
12356 In basic blocks we only analyze statements that are a part of some SLP
12357 instance, therefore, all the statements are relevant.
12358
12359 Pattern statement needs to be analyzed instead of the original statement
12360 if the original statement is not relevant. Otherwise, we analyze both
12361 statements. In basic blocks we are called from some SLP instance
12362 traversal, don't analyze pattern stmts instead, the pattern stmts
12363 already will be part of SLP instance. */
12364
12365 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12366 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12367 && !STMT_VINFO_LIVE_P (stmt_info))
12368 {
12369 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12370 && pattern_stmt_info
12371 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12372 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12373 {
12374 /* Analyze PATTERN_STMT instead of the original stmt. */
12375 stmt_info = pattern_stmt_info;
12376 if (dump_enabled_p ())
12377 dump_printf_loc (MSG_NOTE, vect_location,
12378 "==> examining pattern statement: %G",
12379 stmt_info->stmt);
12380 }
12381 else
12382 {
12383 if (dump_enabled_p ())
12384 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12385
12386 return opt_result::success ();
12387 }
12388 }
12389 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12390 && node == NULL
12391 && pattern_stmt_info
12392 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12393 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12394 {
12395 /* Analyze PATTERN_STMT too. */
12396 if (dump_enabled_p ())
12397 dump_printf_loc (MSG_NOTE, vect_location,
12398 "==> examining pattern statement: %G",
12399 pattern_stmt_info->stmt);
12400
12401 opt_result res
12402 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12403 node_instance, cost_vec);
12404 if (!res)
12405 return res;
12406 }
12407
12408 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12409 {
12410 case vect_internal_def:
12411 break;
12412
12413 case vect_reduction_def:
12414 case vect_nested_cycle:
12415 gcc_assert (!bb_vinfo
12416 && (relevance == vect_used_in_outer
12417 || relevance == vect_used_in_outer_by_reduction
12418 || relevance == vect_used_by_reduction
12419 || relevance == vect_unused_in_scope
12420 || relevance == vect_used_only_live));
12421 break;
12422
12423 case vect_induction_def:
12424 case vect_first_order_recurrence:
12425 gcc_assert (!bb_vinfo);
12426 break;
12427
12428 case vect_constant_def:
12429 case vect_external_def:
12430 case vect_unknown_def_type:
12431 default:
12432 gcc_unreachable ();
12433 }
12434
12435 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12436 if (node)
12437 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12438
12439 if (STMT_VINFO_RELEVANT_P (stmt_info))
12440 {
12441 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12442 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12443 || (call && gimple_call_lhs (call) == NULL_TREE));
12444 *need_to_vectorize = true;
12445 }
12446
12447 if (PURE_SLP_STMT (stmt_info) && !node)
12448 {
12449 if (dump_enabled_p ())
12450 dump_printf_loc (MSG_NOTE, vect_location,
12451 "handled only by SLP analysis\n");
12452 return opt_result::success ();
12453 }
12454
12455 ok = true;
12456 if (!bb_vinfo
12457 && (STMT_VINFO_RELEVANT_P (stmt_info)
12458 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12459 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12460 -mveclibabi= takes preference over library functions with
12461 the simd attribute. */
12462 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12463 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12464 cost_vec)
12465 || vectorizable_conversion (vinfo, stmt_info,
12466 NULL, NULL, node, cost_vec)
12467 || vectorizable_operation (vinfo, stmt_info,
12468 NULL, NULL, node, cost_vec)
12469 || vectorizable_assignment (vinfo, stmt_info,
12470 NULL, NULL, node, cost_vec)
12471 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12472 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12473 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12474 node, node_instance, cost_vec)
12475 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12476 NULL, node, cost_vec)
12477 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12478 || vectorizable_condition (vinfo, stmt_info,
12479 NULL, NULL, node, cost_vec)
12480 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12481 cost_vec)
12482 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12483 stmt_info, NULL, node)
12484 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12485 stmt_info, NULL, node, cost_vec));
12486 else
12487 {
12488 if (bb_vinfo)
12489 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12490 || vectorizable_simd_clone_call (vinfo, stmt_info,
12491 NULL, NULL, node, cost_vec)
12492 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12493 cost_vec)
12494 || vectorizable_shift (vinfo, stmt_info,
12495 NULL, NULL, node, cost_vec)
12496 || vectorizable_operation (vinfo, stmt_info,
12497 NULL, NULL, node, cost_vec)
12498 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12499 cost_vec)
12500 || vectorizable_load (vinfo, stmt_info,
12501 NULL, NULL, node, cost_vec)
12502 || vectorizable_store (vinfo, stmt_info,
12503 NULL, NULL, node, cost_vec)
12504 || vectorizable_condition (vinfo, stmt_info,
12505 NULL, NULL, node, cost_vec)
12506 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12507 cost_vec)
12508 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12509 }
12510
12511 if (node)
12512 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12513
12514 if (!ok)
12515 return opt_result::failure_at (stmt_info->stmt,
12516 "not vectorized:"
12517 " relevant stmt not supported: %G",
12518 stmt_info->stmt);
12519
12520 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12521 need extra handling, except for vectorizable reductions. */
12522 if (!bb_vinfo
12523 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12524 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12525 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12526 stmt_info, node, node_instance,
12527 false, cost_vec))
12528 return opt_result::failure_at (stmt_info->stmt,
12529 "not vectorized:"
12530 " live stmt not supported: %G",
12531 stmt_info->stmt);
12532
12533 return opt_result::success ();
12534 }
12535
12536
12537 /* Function vect_transform_stmt.
12538
12539 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12540
12541 bool
12542 vect_transform_stmt (vec_info *vinfo,
12543 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12544 slp_tree slp_node, slp_instance slp_node_instance)
12545 {
12546 bool is_store = false;
12547 gimple *vec_stmt = NULL;
12548 bool done;
12549
12550 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
12551
12552 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12553 if (slp_node)
12554 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
12555
12556 switch (STMT_VINFO_TYPE (stmt_info))
12557 {
12558 case type_demotion_vec_info_type:
12559 case type_promotion_vec_info_type:
12560 case type_conversion_vec_info_type:
12561 done = vectorizable_conversion (vinfo, stmt_info,
12562 gsi, &vec_stmt, slp_node, NULL);
12563 gcc_assert (done);
12564 break;
12565
12566 case induc_vec_info_type:
12567 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
12568 stmt_info, &vec_stmt, slp_node,
12569 NULL);
12570 gcc_assert (done);
12571 break;
12572
12573 case shift_vec_info_type:
12574 done = vectorizable_shift (vinfo, stmt_info,
12575 gsi, &vec_stmt, slp_node, NULL);
12576 gcc_assert (done);
12577 break;
12578
12579 case op_vec_info_type:
12580 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12581 NULL);
12582 gcc_assert (done);
12583 break;
12584
12585 case assignment_vec_info_type:
12586 done = vectorizable_assignment (vinfo, stmt_info,
12587 gsi, &vec_stmt, slp_node, NULL);
12588 gcc_assert (done);
12589 break;
12590
12591 case load_vec_info_type:
12592 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12593 NULL);
12594 gcc_assert (done);
12595 break;
12596
12597 case store_vec_info_type:
12598 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
12599 && !slp_node
12600 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
12601 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
12602 /* In case of interleaving, the whole chain is vectorized when the
12603 last store in the chain is reached. Store stmts before the last
12604 one are skipped, and there vec_stmt_info shouldn't be freed
12605 meanwhile. */
12606 ;
12607 else
12608 {
12609 done = vectorizable_store (vinfo, stmt_info,
12610 gsi, &vec_stmt, slp_node, NULL);
12611 gcc_assert (done);
12612 is_store = true;
12613 }
12614 break;
12615
12616 case condition_vec_info_type:
12617 done = vectorizable_condition (vinfo, stmt_info,
12618 gsi, &vec_stmt, slp_node, NULL);
12619 gcc_assert (done);
12620 break;
12621
12622 case comparison_vec_info_type:
12623 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
12624 slp_node, NULL);
12625 gcc_assert (done);
12626 break;
12627
12628 case call_vec_info_type:
12629 done = vectorizable_call (vinfo, stmt_info,
12630 gsi, &vec_stmt, slp_node, NULL);
12631 break;
12632
12633 case call_simd_clone_vec_info_type:
12634 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
12635 slp_node, NULL);
12636 break;
12637
12638 case reduc_vec_info_type:
12639 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12640 gsi, &vec_stmt, slp_node);
12641 gcc_assert (done);
12642 break;
12643
12644 case cycle_phi_info_type:
12645 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
12646 &vec_stmt, slp_node, slp_node_instance);
12647 gcc_assert (done);
12648 break;
12649
12650 case lc_phi_info_type:
12651 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12652 stmt_info, &vec_stmt, slp_node);
12653 gcc_assert (done);
12654 break;
12655
12656 case recurr_info_type:
12657 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12658 stmt_info, &vec_stmt, slp_node, NULL);
12659 gcc_assert (done);
12660 break;
12661
12662 case phi_info_type:
12663 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
12664 gcc_assert (done);
12665 break;
12666
12667 default:
12668 if (!STMT_VINFO_LIVE_P (stmt_info))
12669 {
12670 if (dump_enabled_p ())
12671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12672 "stmt not supported.\n");
12673 gcc_unreachable ();
12674 }
12675 done = true;
12676 }
12677
12678 if (!slp_node && vec_stmt)
12679 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
12680
12681 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
12682 {
12683 /* Handle stmts whose DEF is used outside the loop-nest that is
12684 being vectorized. */
12685 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
12686 slp_node_instance, true, NULL);
12687 gcc_assert (done);
12688 }
12689
12690 if (slp_node)
12691 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12692
12693 return is_store;
12694 }
12695
12696
12697 /* Remove a group of stores (for SLP or interleaving), free their
12698 stmt_vec_info. */
12699
12700 void
12701 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
12702 {
12703 stmt_vec_info next_stmt_info = first_stmt_info;
12704
12705 while (next_stmt_info)
12706 {
12707 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
12708 next_stmt_info = vect_orig_stmt (next_stmt_info);
12709 /* Free the attached stmt_vec_info and remove the stmt. */
12710 vinfo->remove_stmt (next_stmt_info);
12711 next_stmt_info = tmp;
12712 }
12713 }
12714
12715 /* If NUNITS is nonzero, return a vector type that contains NUNITS
12716 elements of type SCALAR_TYPE, or null if the target doesn't support
12717 such a type.
12718
12719 If NUNITS is zero, return a vector type that contains elements of
12720 type SCALAR_TYPE, choosing whichever vector size the target prefers.
12721
12722 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
12723 for this vectorization region and want to "autodetect" the best choice.
12724 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
12725 and we want the new type to be interoperable with it. PREVAILING_MODE
12726 in this case can be a scalar integer mode or a vector mode; when it
12727 is a vector mode, the function acts like a tree-level version of
12728 related_vector_mode. */
12729
12730 tree
12731 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
12732 tree scalar_type, poly_uint64 nunits)
12733 {
12734 tree orig_scalar_type = scalar_type;
12735 scalar_mode inner_mode;
12736 machine_mode simd_mode;
12737 tree vectype;
12738
12739 if ((!INTEGRAL_TYPE_P (scalar_type)
12740 && !POINTER_TYPE_P (scalar_type)
12741 && !SCALAR_FLOAT_TYPE_P (scalar_type))
12742 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
12743 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
12744 return NULL_TREE;
12745
12746 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
12747
12748 /* Interoperability between modes requires one to be a constant multiple
12749 of the other, so that the number of vectors required for each operation
12750 is a compile-time constant. */
12751 if (prevailing_mode != VOIDmode
12752 && !constant_multiple_p (nunits * nbytes,
12753 GET_MODE_SIZE (prevailing_mode))
12754 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
12755 nunits * nbytes))
12756 return NULL_TREE;
12757
12758 /* For vector types of elements whose mode precision doesn't
12759 match their types precision we use a element type of mode
12760 precision. The vectorization routines will have to make sure
12761 they support the proper result truncation/extension.
12762 We also make sure to build vector types with INTEGER_TYPE
12763 component type only. */
12764 if (INTEGRAL_TYPE_P (scalar_type)
12765 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
12766 || TREE_CODE (scalar_type) != INTEGER_TYPE))
12767 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
12768 TYPE_UNSIGNED (scalar_type));
12769
12770 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
12771 When the component mode passes the above test simply use a type
12772 corresponding to that mode. The theory is that any use that
12773 would cause problems with this will disable vectorization anyway. */
12774 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
12775 && !INTEGRAL_TYPE_P (scalar_type))
12776 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
12777
12778 /* We can't build a vector type of elements with alignment bigger than
12779 their size. */
12780 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
12781 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
12782 TYPE_UNSIGNED (scalar_type));
12783
12784 /* If we felt back to using the mode fail if there was
12785 no scalar type for it. */
12786 if (scalar_type == NULL_TREE)
12787 return NULL_TREE;
12788
12789 /* If no prevailing mode was supplied, use the mode the target prefers.
12790 Otherwise lookup a vector mode based on the prevailing mode. */
12791 if (prevailing_mode == VOIDmode)
12792 {
12793 gcc_assert (known_eq (nunits, 0U));
12794 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
12795 if (SCALAR_INT_MODE_P (simd_mode))
12796 {
12797 /* Traditional behavior is not to take the integer mode
12798 literally, but simply to use it as a way of determining
12799 the vector size. It is up to mode_for_vector to decide
12800 what the TYPE_MODE should be.
12801
12802 Note that nunits == 1 is allowed in order to support single
12803 element vector types. */
12804 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
12805 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
12806 return NULL_TREE;
12807 }
12808 }
12809 else if (SCALAR_INT_MODE_P (prevailing_mode)
12810 || !related_vector_mode (prevailing_mode,
12811 inner_mode, nunits).exists (&simd_mode))
12812 {
12813 /* Fall back to using mode_for_vector, mostly in the hope of being
12814 able to use an integer mode. */
12815 if (known_eq (nunits, 0U)
12816 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
12817 return NULL_TREE;
12818
12819 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
12820 return NULL_TREE;
12821 }
12822
12823 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
12824
12825 /* In cases where the mode was chosen by mode_for_vector, check that
12826 the target actually supports the chosen mode, or that it at least
12827 allows the vector mode to be replaced by a like-sized integer. */
12828 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
12829 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
12830 return NULL_TREE;
12831
12832 /* Re-attach the address-space qualifier if we canonicalized the scalar
12833 type. */
12834 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
12835 return build_qualified_type
12836 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
12837
12838 return vectype;
12839 }
12840
12841 /* Function get_vectype_for_scalar_type.
12842
12843 Returns the vector type corresponding to SCALAR_TYPE as supported
12844 by the target. If GROUP_SIZE is nonzero and we're performing BB
12845 vectorization, make sure that the number of elements in the vector
12846 is no bigger than GROUP_SIZE. */
12847
12848 tree
12849 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
12850 unsigned int group_size)
12851 {
12852 /* For BB vectorization, we should always have a group size once we've
12853 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12854 are tentative requests during things like early data reference
12855 analysis and pattern recognition. */
12856 if (is_a <bb_vec_info> (vinfo))
12857 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12858 else
12859 group_size = 0;
12860
12861 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12862 scalar_type);
12863 if (vectype && vinfo->vector_mode == VOIDmode)
12864 vinfo->vector_mode = TYPE_MODE (vectype);
12865
12866 /* Register the natural choice of vector type, before the group size
12867 has been applied. */
12868 if (vectype)
12869 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
12870
12871 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
12872 try again with an explicit number of elements. */
12873 if (vectype
12874 && group_size
12875 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
12876 {
12877 /* Start with the biggest number of units that fits within
12878 GROUP_SIZE and halve it until we find a valid vector type.
12879 Usually either the first attempt will succeed or all will
12880 fail (in the latter case because GROUP_SIZE is too small
12881 for the target), but it's possible that a target could have
12882 a hole between supported vector types.
12883
12884 If GROUP_SIZE is not a power of 2, this has the effect of
12885 trying the largest power of 2 that fits within the group,
12886 even though the group is not a multiple of that vector size.
12887 The BB vectorizer will then try to carve up the group into
12888 smaller pieces. */
12889 unsigned int nunits = 1 << floor_log2 (group_size);
12890 do
12891 {
12892 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12893 scalar_type, nunits);
12894 nunits /= 2;
12895 }
12896 while (nunits > 1 && !vectype);
12897 }
12898
12899 return vectype;
12900 }
12901
12902 /* Return the vector type corresponding to SCALAR_TYPE as supported
12903 by the target. NODE, if nonnull, is the SLP tree node that will
12904 use the returned vector type. */
12905
12906 tree
12907 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
12908 {
12909 unsigned int group_size = 0;
12910 if (node)
12911 group_size = SLP_TREE_LANES (node);
12912 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12913 }
12914
12915 /* Function get_mask_type_for_scalar_type.
12916
12917 Returns the mask type corresponding to a result of comparison
12918 of vectors of specified SCALAR_TYPE as supported by target.
12919 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12920 make sure that the number of elements in the vector is no bigger
12921 than GROUP_SIZE. */
12922
12923 tree
12924 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
12925 unsigned int group_size)
12926 {
12927 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12928
12929 if (!vectype)
12930 return NULL;
12931
12932 return truth_type_for (vectype);
12933 }
12934
12935 /* Function get_same_sized_vectype
12936
12937 Returns a vector type corresponding to SCALAR_TYPE of size
12938 VECTOR_TYPE if supported by the target. */
12939
12940 tree
12941 get_same_sized_vectype (tree scalar_type, tree vector_type)
12942 {
12943 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12944 return truth_type_for (vector_type);
12945
12946 poly_uint64 nunits;
12947 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
12948 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
12949 return NULL_TREE;
12950
12951 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
12952 scalar_type, nunits);
12953 }
12954
12955 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
12956 would not change the chosen vector modes. */
12957
12958 bool
12959 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
12960 {
12961 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
12962 i != vinfo->used_vector_modes.end (); ++i)
12963 if (!VECTOR_MODE_P (*i)
12964 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
12965 return false;
12966 return true;
12967 }
12968
12969 /* Function vect_is_simple_use.
12970
12971 Input:
12972 VINFO - the vect info of the loop or basic block that is being vectorized.
12973 OPERAND - operand in the loop or bb.
12974 Output:
12975 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
12976 case OPERAND is an SSA_NAME that is defined in the vectorizable region
12977 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
12978 the definition could be anywhere in the function
12979 DT - the type of definition
12980
12981 Returns whether a stmt with OPERAND can be vectorized.
12982 For loops, supportable operands are constants, loop invariants, and operands
12983 that are defined by the current iteration of the loop. Unsupportable
12984 operands are those that are defined by a previous iteration of the loop (as
12985 is the case in reduction/induction computations).
12986 For basic blocks, supportable operands are constants and bb invariants.
12987 For now, operands defined outside the basic block are not supported. */
12988
12989 bool
12990 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12991 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
12992 {
12993 if (def_stmt_info_out)
12994 *def_stmt_info_out = NULL;
12995 if (def_stmt_out)
12996 *def_stmt_out = NULL;
12997 *dt = vect_unknown_def_type;
12998
12999 if (dump_enabled_p ())
13000 {
13001 dump_printf_loc (MSG_NOTE, vect_location,
13002 "vect_is_simple_use: operand ");
13003 if (TREE_CODE (operand) == SSA_NAME
13004 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13005 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13006 else
13007 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13008 }
13009
13010 if (CONSTANT_CLASS_P (operand))
13011 *dt = vect_constant_def;
13012 else if (is_gimple_min_invariant (operand))
13013 *dt = vect_external_def;
13014 else if (TREE_CODE (operand) != SSA_NAME)
13015 *dt = vect_unknown_def_type;
13016 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13017 *dt = vect_external_def;
13018 else
13019 {
13020 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13021 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13022 if (!stmt_vinfo)
13023 *dt = vect_external_def;
13024 else
13025 {
13026 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13027 def_stmt = stmt_vinfo->stmt;
13028 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13029 if (def_stmt_info_out)
13030 *def_stmt_info_out = stmt_vinfo;
13031 }
13032 if (def_stmt_out)
13033 *def_stmt_out = def_stmt;
13034 }
13035
13036 if (dump_enabled_p ())
13037 {
13038 dump_printf (MSG_NOTE, ", type of def: ");
13039 switch (*dt)
13040 {
13041 case vect_uninitialized_def:
13042 dump_printf (MSG_NOTE, "uninitialized\n");
13043 break;
13044 case vect_constant_def:
13045 dump_printf (MSG_NOTE, "constant\n");
13046 break;
13047 case vect_external_def:
13048 dump_printf (MSG_NOTE, "external\n");
13049 break;
13050 case vect_internal_def:
13051 dump_printf (MSG_NOTE, "internal\n");
13052 break;
13053 case vect_induction_def:
13054 dump_printf (MSG_NOTE, "induction\n");
13055 break;
13056 case vect_reduction_def:
13057 dump_printf (MSG_NOTE, "reduction\n");
13058 break;
13059 case vect_double_reduction_def:
13060 dump_printf (MSG_NOTE, "double reduction\n");
13061 break;
13062 case vect_nested_cycle:
13063 dump_printf (MSG_NOTE, "nested cycle\n");
13064 break;
13065 case vect_first_order_recurrence:
13066 dump_printf (MSG_NOTE, "first order recurrence\n");
13067 break;
13068 case vect_unknown_def_type:
13069 dump_printf (MSG_NOTE, "unknown\n");
13070 break;
13071 }
13072 }
13073
13074 if (*dt == vect_unknown_def_type)
13075 {
13076 if (dump_enabled_p ())
13077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13078 "Unsupported pattern.\n");
13079 return false;
13080 }
13081
13082 return true;
13083 }
13084
13085 /* Function vect_is_simple_use.
13086
13087 Same as vect_is_simple_use but also determines the vector operand
13088 type of OPERAND and stores it to *VECTYPE. If the definition of
13089 OPERAND is vect_uninitialized_def, vect_constant_def or
13090 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13091 is responsible to compute the best suited vector type for the
13092 scalar operand. */
13093
13094 bool
13095 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13096 tree *vectype, stmt_vec_info *def_stmt_info_out,
13097 gimple **def_stmt_out)
13098 {
13099 stmt_vec_info def_stmt_info;
13100 gimple *def_stmt;
13101 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13102 return false;
13103
13104 if (def_stmt_out)
13105 *def_stmt_out = def_stmt;
13106 if (def_stmt_info_out)
13107 *def_stmt_info_out = def_stmt_info;
13108
13109 /* Now get a vector type if the def is internal, otherwise supply
13110 NULL_TREE and leave it up to the caller to figure out a proper
13111 type for the use stmt. */
13112 if (*dt == vect_internal_def
13113 || *dt == vect_induction_def
13114 || *dt == vect_reduction_def
13115 || *dt == vect_double_reduction_def
13116 || *dt == vect_nested_cycle
13117 || *dt == vect_first_order_recurrence)
13118 {
13119 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13120 gcc_assert (*vectype != NULL_TREE);
13121 if (dump_enabled_p ())
13122 dump_printf_loc (MSG_NOTE, vect_location,
13123 "vect_is_simple_use: vectype %T\n", *vectype);
13124 }
13125 else if (*dt == vect_uninitialized_def
13126 || *dt == vect_constant_def
13127 || *dt == vect_external_def)
13128 *vectype = NULL_TREE;
13129 else
13130 gcc_unreachable ();
13131
13132 return true;
13133 }
13134
13135 /* Function vect_is_simple_use.
13136
13137 Same as vect_is_simple_use but determines the operand by operand
13138 position OPERAND from either STMT or SLP_NODE, filling in *OP
13139 and *SLP_DEF (when SLP_NODE is not NULL). */
13140
13141 bool
13142 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13143 unsigned operand, tree *op, slp_tree *slp_def,
13144 enum vect_def_type *dt,
13145 tree *vectype, stmt_vec_info *def_stmt_info_out)
13146 {
13147 if (slp_node)
13148 {
13149 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13150 *slp_def = child;
13151 *vectype = SLP_TREE_VECTYPE (child);
13152 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13153 {
13154 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13155 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13156 }
13157 else
13158 {
13159 if (def_stmt_info_out)
13160 *def_stmt_info_out = NULL;
13161 *op = SLP_TREE_SCALAR_OPS (child)[0];
13162 *dt = SLP_TREE_DEF_TYPE (child);
13163 return true;
13164 }
13165 }
13166 else
13167 {
13168 *slp_def = NULL;
13169 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13170 {
13171 if (gimple_assign_rhs_code (ass) == COND_EXPR
13172 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13173 {
13174 if (operand < 2)
13175 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13176 else
13177 *op = gimple_op (ass, operand);
13178 }
13179 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13180 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13181 else
13182 *op = gimple_op (ass, operand + 1);
13183 }
13184 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13185 *op = gimple_call_arg (call, operand);
13186 else
13187 gcc_unreachable ();
13188 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13189 }
13190 }
13191
13192 /* If OP is not NULL and is external or constant update its vector
13193 type with VECTYPE. Returns true if successful or false if not,
13194 for example when conflicting vector types are present. */
13195
13196 bool
13197 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13198 {
13199 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13200 return true;
13201 if (SLP_TREE_VECTYPE (op))
13202 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13203 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13204 should be handled by patters. Allow vect_constant_def for now. */
13205 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13206 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13207 return false;
13208 SLP_TREE_VECTYPE (op) = vectype;
13209 return true;
13210 }
13211
13212 /* Function supportable_widening_operation
13213
13214 Check whether an operation represented by the code CODE is a
13215 widening operation that is supported by the target platform in
13216 vector form (i.e., when operating on arguments of type VECTYPE_IN
13217 producing a result of type VECTYPE_OUT).
13218
13219 Widening operations we currently support are NOP (CONVERT), FLOAT,
13220 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13221 are supported by the target platform either directly (via vector
13222 tree-codes), or via target builtins.
13223
13224 Output:
13225 - CODE1 and CODE2 are codes of vector operations to be used when
13226 vectorizing the operation, if available.
13227 - MULTI_STEP_CVT determines the number of required intermediate steps in
13228 case of multi-step conversion (like char->short->int - in that case
13229 MULTI_STEP_CVT will be 1).
13230 - INTERM_TYPES contains the intermediate type required to perform the
13231 widening operation (short in the above example). */
13232
13233 bool
13234 supportable_widening_operation (vec_info *vinfo,
13235 code_helper code,
13236 stmt_vec_info stmt_info,
13237 tree vectype_out, tree vectype_in,
13238 code_helper *code1,
13239 code_helper *code2,
13240 int *multi_step_cvt,
13241 vec<tree> *interm_types)
13242 {
13243 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13244 class loop *vect_loop = NULL;
13245 machine_mode vec_mode;
13246 enum insn_code icode1, icode2;
13247 optab optab1 = unknown_optab, optab2 = unknown_optab;
13248 tree vectype = vectype_in;
13249 tree wide_vectype = vectype_out;
13250 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13251 int i;
13252 tree prev_type, intermediate_type;
13253 machine_mode intermediate_mode, prev_mode;
13254 optab optab3, optab4;
13255
13256 *multi_step_cvt = 0;
13257 if (loop_info)
13258 vect_loop = LOOP_VINFO_LOOP (loop_info);
13259
13260 switch (code.safe_as_tree_code ())
13261 {
13262 case MAX_TREE_CODES:
13263 /* Don't set c1 and c2 if code is not a tree_code. */
13264 break;
13265
13266 case WIDEN_MULT_EXPR:
13267 /* The result of a vectorized widening operation usually requires
13268 two vectors (because the widened results do not fit into one vector).
13269 The generated vector results would normally be expected to be
13270 generated in the same order as in the original scalar computation,
13271 i.e. if 8 results are generated in each vector iteration, they are
13272 to be organized as follows:
13273 vect1: [res1,res2,res3,res4],
13274 vect2: [res5,res6,res7,res8].
13275
13276 However, in the special case that the result of the widening
13277 operation is used in a reduction computation only, the order doesn't
13278 matter (because when vectorizing a reduction we change the order of
13279 the computation). Some targets can take advantage of this and
13280 generate more efficient code. For example, targets like Altivec,
13281 that support widen_mult using a sequence of {mult_even,mult_odd}
13282 generate the following vectors:
13283 vect1: [res1,res3,res5,res7],
13284 vect2: [res2,res4,res6,res8].
13285
13286 When vectorizing outer-loops, we execute the inner-loop sequentially
13287 (each vectorized inner-loop iteration contributes to VF outer-loop
13288 iterations in parallel). We therefore don't allow to change the
13289 order of the computation in the inner-loop during outer-loop
13290 vectorization. */
13291 /* TODO: Another case in which order doesn't *really* matter is when we
13292 widen and then contract again, e.g. (short)((int)x * y >> 8).
13293 Normally, pack_trunc performs an even/odd permute, whereas the
13294 repack from an even/odd expansion would be an interleave, which
13295 would be significantly simpler for e.g. AVX2. */
13296 /* In any case, in order to avoid duplicating the code below, recurse
13297 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13298 are properly set up for the caller. If we fail, we'll continue with
13299 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13300 if (vect_loop
13301 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13302 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13303 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13304 stmt_info, vectype_out,
13305 vectype_in, code1,
13306 code2, multi_step_cvt,
13307 interm_types))
13308 {
13309 /* Elements in a vector with vect_used_by_reduction property cannot
13310 be reordered if the use chain with this property does not have the
13311 same operation. One such an example is s += a * b, where elements
13312 in a and b cannot be reordered. Here we check if the vector defined
13313 by STMT is only directly used in the reduction statement. */
13314 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13315 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13316 if (use_stmt_info
13317 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13318 return true;
13319 }
13320 c1 = VEC_WIDEN_MULT_LO_EXPR;
13321 c2 = VEC_WIDEN_MULT_HI_EXPR;
13322 break;
13323
13324 case DOT_PROD_EXPR:
13325 c1 = DOT_PROD_EXPR;
13326 c2 = DOT_PROD_EXPR;
13327 break;
13328
13329 case SAD_EXPR:
13330 c1 = SAD_EXPR;
13331 c2 = SAD_EXPR;
13332 break;
13333
13334 case VEC_WIDEN_MULT_EVEN_EXPR:
13335 /* Support the recursion induced just above. */
13336 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13337 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13338 break;
13339
13340 case WIDEN_LSHIFT_EXPR:
13341 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13342 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13343 break;
13344
13345 CASE_CONVERT:
13346 c1 = VEC_UNPACK_LO_EXPR;
13347 c2 = VEC_UNPACK_HI_EXPR;
13348 break;
13349
13350 case FLOAT_EXPR:
13351 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13352 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13353 break;
13354
13355 case FIX_TRUNC_EXPR:
13356 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13357 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13358 break;
13359
13360 default:
13361 gcc_unreachable ();
13362 }
13363
13364 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13365 std::swap (c1, c2);
13366
13367 if (code == FIX_TRUNC_EXPR)
13368 {
13369 /* The signedness is determined from output operand. */
13370 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13371 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13372 }
13373 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13374 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13375 && VECTOR_BOOLEAN_TYPE_P (vectype)
13376 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13377 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13378 {
13379 /* If the input and result modes are the same, a different optab
13380 is needed where we pass in the number of units in vectype. */
13381 optab1 = vec_unpacks_sbool_lo_optab;
13382 optab2 = vec_unpacks_sbool_hi_optab;
13383 }
13384
13385 vec_mode = TYPE_MODE (vectype);
13386 if (widening_fn_p (code))
13387 {
13388 /* If this is an internal fn then we must check whether the target
13389 supports either a low-high split or an even-odd split. */
13390 internal_fn ifn = as_internal_fn ((combined_fn) code);
13391
13392 internal_fn lo, hi, even, odd;
13393 lookup_hilo_internal_fn (ifn, &lo, &hi);
13394 *code1 = as_combined_fn (lo);
13395 *code2 = as_combined_fn (hi);
13396 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13397 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13398
13399 /* If we don't support low-high, then check for even-odd. */
13400 if (!optab1
13401 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13402 || !optab2
13403 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13404 {
13405 lookup_evenodd_internal_fn (ifn, &even, &odd);
13406 *code1 = as_combined_fn (even);
13407 *code2 = as_combined_fn (odd);
13408 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13409 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13410 }
13411 }
13412 else if (code.is_tree_code ())
13413 {
13414 if (code == FIX_TRUNC_EXPR)
13415 {
13416 /* The signedness is determined from output operand. */
13417 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13418 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13419 }
13420 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13421 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13422 && VECTOR_BOOLEAN_TYPE_P (vectype)
13423 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13424 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13425 {
13426 /* If the input and result modes are the same, a different optab
13427 is needed where we pass in the number of units in vectype. */
13428 optab1 = vec_unpacks_sbool_lo_optab;
13429 optab2 = vec_unpacks_sbool_hi_optab;
13430 }
13431 else
13432 {
13433 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13434 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13435 }
13436 *code1 = c1;
13437 *code2 = c2;
13438 }
13439
13440 if (!optab1 || !optab2)
13441 return false;
13442
13443 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13444 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13445 return false;
13446
13447
13448 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13449 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13450 {
13451 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13452 return true;
13453 /* For scalar masks we may have different boolean
13454 vector types having the same QImode. Thus we
13455 add additional check for elements number. */
13456 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13457 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13458 return true;
13459 }
13460
13461 /* Check if it's a multi-step conversion that can be done using intermediate
13462 types. */
13463
13464 prev_type = vectype;
13465 prev_mode = vec_mode;
13466
13467 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13468 return false;
13469
13470 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13471 intermediate steps in promotion sequence. We try
13472 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13473 not. */
13474 interm_types->create (MAX_INTERM_CVT_STEPS);
13475 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13476 {
13477 intermediate_mode = insn_data[icode1].operand[0].mode;
13478 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13479 intermediate_type
13480 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13481 else if (VECTOR_MODE_P (intermediate_mode))
13482 {
13483 tree intermediate_element_type
13484 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13485 TYPE_UNSIGNED (prev_type));
13486 intermediate_type
13487 = build_vector_type_for_mode (intermediate_element_type,
13488 intermediate_mode);
13489 }
13490 else
13491 intermediate_type
13492 = lang_hooks.types.type_for_mode (intermediate_mode,
13493 TYPE_UNSIGNED (prev_type));
13494
13495 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13496 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13497 && intermediate_mode == prev_mode
13498 && SCALAR_INT_MODE_P (prev_mode))
13499 {
13500 /* If the input and result modes are the same, a different optab
13501 is needed where we pass in the number of units in vectype. */
13502 optab3 = vec_unpacks_sbool_lo_optab;
13503 optab4 = vec_unpacks_sbool_hi_optab;
13504 }
13505 else
13506 {
13507 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13508 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13509 }
13510
13511 if (!optab3 || !optab4
13512 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13513 || insn_data[icode1].operand[0].mode != intermediate_mode
13514 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13515 || insn_data[icode2].operand[0].mode != intermediate_mode
13516 || ((icode1 = optab_handler (optab3, intermediate_mode))
13517 == CODE_FOR_nothing)
13518 || ((icode2 = optab_handler (optab4, intermediate_mode))
13519 == CODE_FOR_nothing))
13520 break;
13521
13522 interm_types->quick_push (intermediate_type);
13523 (*multi_step_cvt)++;
13524
13525 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13526 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13527 {
13528 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13529 return true;
13530 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13531 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13532 return true;
13533 }
13534
13535 prev_type = intermediate_type;
13536 prev_mode = intermediate_mode;
13537 }
13538
13539 interm_types->release ();
13540 return false;
13541 }
13542
13543
13544 /* Function supportable_narrowing_operation
13545
13546 Check whether an operation represented by the code CODE is a
13547 narrowing operation that is supported by the target platform in
13548 vector form (i.e., when operating on arguments of type VECTYPE_IN
13549 and producing a result of type VECTYPE_OUT).
13550
13551 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
13552 and FLOAT. This function checks if these operations are supported by
13553 the target platform directly via vector tree-codes.
13554
13555 Output:
13556 - CODE1 is the code of a vector operation to be used when
13557 vectorizing the operation, if available.
13558 - MULTI_STEP_CVT determines the number of required intermediate steps in
13559 case of multi-step conversion (like int->short->char - in that case
13560 MULTI_STEP_CVT will be 1).
13561 - INTERM_TYPES contains the intermediate type required to perform the
13562 narrowing operation (short in the above example). */
13563
13564 bool
13565 supportable_narrowing_operation (code_helper code,
13566 tree vectype_out, tree vectype_in,
13567 code_helper *code1, int *multi_step_cvt,
13568 vec<tree> *interm_types)
13569 {
13570 machine_mode vec_mode;
13571 enum insn_code icode1;
13572 optab optab1, interm_optab;
13573 tree vectype = vectype_in;
13574 tree narrow_vectype = vectype_out;
13575 enum tree_code c1;
13576 tree intermediate_type, prev_type;
13577 machine_mode intermediate_mode, prev_mode;
13578 int i;
13579 unsigned HOST_WIDE_INT n_elts;
13580 bool uns;
13581
13582 if (!code.is_tree_code ())
13583 return false;
13584
13585 *multi_step_cvt = 0;
13586 switch ((tree_code) code)
13587 {
13588 CASE_CONVERT:
13589 c1 = VEC_PACK_TRUNC_EXPR;
13590 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
13591 && VECTOR_BOOLEAN_TYPE_P (vectype)
13592 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
13593 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
13594 && n_elts < BITS_PER_UNIT)
13595 optab1 = vec_pack_sbool_trunc_optab;
13596 else
13597 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13598 break;
13599
13600 case FIX_TRUNC_EXPR:
13601 c1 = VEC_PACK_FIX_TRUNC_EXPR;
13602 /* The signedness is determined from output operand. */
13603 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13604 break;
13605
13606 case FLOAT_EXPR:
13607 c1 = VEC_PACK_FLOAT_EXPR;
13608 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13609 break;
13610
13611 default:
13612 gcc_unreachable ();
13613 }
13614
13615 if (!optab1)
13616 return false;
13617
13618 vec_mode = TYPE_MODE (vectype);
13619 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
13620 return false;
13621
13622 *code1 = c1;
13623
13624 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
13625 {
13626 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13627 return true;
13628 /* For scalar masks we may have different boolean
13629 vector types having the same QImode. Thus we
13630 add additional check for elements number. */
13631 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
13632 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
13633 return true;
13634 }
13635
13636 if (code == FLOAT_EXPR)
13637 return false;
13638
13639 /* Check if it's a multi-step conversion that can be done using intermediate
13640 types. */
13641 prev_mode = vec_mode;
13642 prev_type = vectype;
13643 if (code == FIX_TRUNC_EXPR)
13644 uns = TYPE_UNSIGNED (vectype_out);
13645 else
13646 uns = TYPE_UNSIGNED (vectype);
13647
13648 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
13649 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
13650 costly than signed. */
13651 if (code == FIX_TRUNC_EXPR && uns)
13652 {
13653 enum insn_code icode2;
13654
13655 intermediate_type
13656 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
13657 interm_optab
13658 = optab_for_tree_code (c1, intermediate_type, optab_default);
13659 if (interm_optab != unknown_optab
13660 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
13661 && insn_data[icode1].operand[0].mode
13662 == insn_data[icode2].operand[0].mode)
13663 {
13664 uns = false;
13665 optab1 = interm_optab;
13666 icode1 = icode2;
13667 }
13668 }
13669
13670 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13671 intermediate steps in promotion sequence. We try
13672 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
13673 interm_types->create (MAX_INTERM_CVT_STEPS);
13674 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13675 {
13676 intermediate_mode = insn_data[icode1].operand[0].mode;
13677 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13678 intermediate_type
13679 = vect_double_mask_nunits (prev_type, intermediate_mode);
13680 else
13681 intermediate_type
13682 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
13683 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13684 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13685 && SCALAR_INT_MODE_P (prev_mode)
13686 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
13687 && n_elts < BITS_PER_UNIT)
13688 interm_optab = vec_pack_sbool_trunc_optab;
13689 else
13690 interm_optab
13691 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
13692 optab_default);
13693 if (!interm_optab
13694 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
13695 || insn_data[icode1].operand[0].mode != intermediate_mode
13696 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
13697 == CODE_FOR_nothing))
13698 break;
13699
13700 interm_types->quick_push (intermediate_type);
13701 (*multi_step_cvt)++;
13702
13703 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
13704 {
13705 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13706 return true;
13707 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
13708 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
13709 return true;
13710 }
13711
13712 prev_mode = intermediate_mode;
13713 prev_type = intermediate_type;
13714 optab1 = interm_optab;
13715 }
13716
13717 interm_types->release ();
13718 return false;
13719 }
13720
13721 /* Generate and return a vector mask of MASK_TYPE such that
13722 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
13723 Add the statements to SEQ. */
13724
13725 tree
13726 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
13727 tree end_index, const char *name)
13728 {
13729 tree cmp_type = TREE_TYPE (start_index);
13730 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
13731 cmp_type, mask_type,
13732 OPTIMIZE_FOR_SPEED));
13733 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
13734 start_index, end_index,
13735 build_zero_cst (mask_type));
13736 tree tmp;
13737 if (name)
13738 tmp = make_temp_ssa_name (mask_type, NULL, name);
13739 else
13740 tmp = make_ssa_name (mask_type);
13741 gimple_call_set_lhs (call, tmp);
13742 gimple_seq_add_stmt (seq, call);
13743 return tmp;
13744 }
13745
13746 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
13747 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
13748
13749 tree
13750 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
13751 tree end_index)
13752 {
13753 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
13754 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
13755 }
13756
13757 /* Try to compute the vector types required to vectorize STMT_INFO,
13758 returning true on success and false if vectorization isn't possible.
13759 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13760 take sure that the number of elements in the vectors is no bigger
13761 than GROUP_SIZE.
13762
13763 On success:
13764
13765 - Set *STMT_VECTYPE_OUT to:
13766 - NULL_TREE if the statement doesn't need to be vectorized;
13767 - the equivalent of STMT_VINFO_VECTYPE otherwise.
13768
13769 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
13770 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
13771 statement does not help to determine the overall number of units. */
13772
13773 opt_result
13774 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
13775 tree *stmt_vectype_out,
13776 tree *nunits_vectype_out,
13777 unsigned int group_size)
13778 {
13779 gimple *stmt = stmt_info->stmt;
13780
13781 /* For BB vectorization, we should always have a group size once we've
13782 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13783 are tentative requests during things like early data reference
13784 analysis and pattern recognition. */
13785 if (is_a <bb_vec_info> (vinfo))
13786 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13787 else
13788 group_size = 0;
13789
13790 *stmt_vectype_out = NULL_TREE;
13791 *nunits_vectype_out = NULL_TREE;
13792
13793 if (gimple_get_lhs (stmt) == NULL_TREE
13794 /* MASK_STORE has no lhs, but is ok. */
13795 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
13796 {
13797 if (is_a <gcall *> (stmt))
13798 {
13799 /* Ignore calls with no lhs. These must be calls to
13800 #pragma omp simd functions, and what vectorization factor
13801 it really needs can't be determined until
13802 vectorizable_simd_clone_call. */
13803 if (dump_enabled_p ())
13804 dump_printf_loc (MSG_NOTE, vect_location,
13805 "defer to SIMD clone analysis.\n");
13806 return opt_result::success ();
13807 }
13808
13809 return opt_result::failure_at (stmt,
13810 "not vectorized: irregular stmt.%G", stmt);
13811 }
13812
13813 tree vectype;
13814 tree scalar_type = NULL_TREE;
13815 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
13816 {
13817 vectype = STMT_VINFO_VECTYPE (stmt_info);
13818 if (dump_enabled_p ())
13819 dump_printf_loc (MSG_NOTE, vect_location,
13820 "precomputed vectype: %T\n", vectype);
13821 }
13822 else if (vect_use_mask_type_p (stmt_info))
13823 {
13824 unsigned int precision = stmt_info->mask_precision;
13825 scalar_type = build_nonstandard_integer_type (precision, 1);
13826 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
13827 if (!vectype)
13828 return opt_result::failure_at (stmt, "not vectorized: unsupported"
13829 " data-type %T\n", scalar_type);
13830 if (dump_enabled_p ())
13831 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
13832 }
13833 else
13834 {
13835 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
13836 scalar_type = TREE_TYPE (DR_REF (dr));
13837 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
13838 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
13839 else
13840 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
13841
13842 if (dump_enabled_p ())
13843 {
13844 if (group_size)
13845 dump_printf_loc (MSG_NOTE, vect_location,
13846 "get vectype for scalar type (group size %d):"
13847 " %T\n", group_size, scalar_type);
13848 else
13849 dump_printf_loc (MSG_NOTE, vect_location,
13850 "get vectype for scalar type: %T\n", scalar_type);
13851 }
13852 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13853 if (!vectype)
13854 return opt_result::failure_at (stmt,
13855 "not vectorized:"
13856 " unsupported data-type %T\n",
13857 scalar_type);
13858
13859 if (dump_enabled_p ())
13860 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
13861 }
13862
13863 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
13864 return opt_result::failure_at (stmt,
13865 "not vectorized: vector stmt in loop:%G",
13866 stmt);
13867
13868 *stmt_vectype_out = vectype;
13869
13870 /* Don't try to compute scalar types if the stmt produces a boolean
13871 vector; use the existing vector type instead. */
13872 tree nunits_vectype = vectype;
13873 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13874 {
13875 /* The number of units is set according to the smallest scalar
13876 type (or the largest vector size, but we only support one
13877 vector size per vectorization). */
13878 scalar_type = vect_get_smallest_scalar_type (stmt_info,
13879 TREE_TYPE (vectype));
13880 if (scalar_type != TREE_TYPE (vectype))
13881 {
13882 if (dump_enabled_p ())
13883 dump_printf_loc (MSG_NOTE, vect_location,
13884 "get vectype for smallest scalar type: %T\n",
13885 scalar_type);
13886 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
13887 group_size);
13888 if (!nunits_vectype)
13889 return opt_result::failure_at
13890 (stmt, "not vectorized: unsupported data-type %T\n",
13891 scalar_type);
13892 if (dump_enabled_p ())
13893 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
13894 nunits_vectype);
13895 }
13896 }
13897
13898 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
13899 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
13900 return opt_result::failure_at (stmt,
13901 "Not vectorized: Incompatible number "
13902 "of vector subparts between %T and %T\n",
13903 nunits_vectype, *stmt_vectype_out);
13904
13905 if (dump_enabled_p ())
13906 {
13907 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
13908 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
13909 dump_printf (MSG_NOTE, "\n");
13910 }
13911
13912 *nunits_vectype_out = nunits_vectype;
13913 return opt_result::success ();
13914 }
13915
13916 /* Generate and return statement sequence that sets vector length LEN that is:
13917
13918 min_of_start_and_end = min (START_INDEX, END_INDEX);
13919 left_len = END_INDEX - min_of_start_and_end;
13920 rhs = min (left_len, LEN_LIMIT);
13921 LEN = rhs;
13922
13923 Note: the cost of the code generated by this function is modeled
13924 by vect_estimate_min_profitable_iters, so changes here may need
13925 corresponding changes there. */
13926
13927 gimple_seq
13928 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
13929 {
13930 gimple_seq stmts = NULL;
13931 tree len_type = TREE_TYPE (len);
13932 gcc_assert (TREE_TYPE (start_index) == len_type);
13933
13934 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
13935 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
13936 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
13937 gimple* stmt = gimple_build_assign (len, rhs);
13938 gimple_seq_add_stmt (&stmts, stmt);
13939
13940 return stmts;
13941 }
13942