]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-stmts.cc
modula2: Tidyup remove unnecessary parameters
[thirdparty/gcc.git] / gcc / tree-vect-stmts.cc
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
60
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
63
64 /* Return the vectorized type for the given statement. */
65
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
68 {
69 return STMT_VINFO_VECTYPE (stmt_info);
70 }
71
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
76 {
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
81
82 if (!loop_vinfo)
83 return false;
84
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
86
87 return (bb->loop_father == loop->inner);
88 }
89
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
93
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
100 {
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
107
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
111
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 }
115
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
121 {
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
124 }
125
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
131 {
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
134 }
135
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
140 {
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
145 }
146
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
148
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
151 {
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
154 }
155
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
160
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
165 {
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
168
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
175
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
180
181 return vect_name;
182 }
183
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
187
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
192 {
193 tree array_ref;
194 gimple *new_stmt;
195
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
199
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202 }
203
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
207
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
210 {
211 tree mem_ref;
212
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
217 }
218
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
221
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
225 {
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229 }
230
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
232
233 /* Function vect_mark_relevant.
234
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
240 {
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
243
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
248
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
254 {
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
259
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
264
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
270
271 if (live_p && relevant == vect_unused_in_scope)
272 {
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
278 }
279
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
284 }
285
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
289
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
292 {
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
297 }
298
299 worklist->safe_push (stmt_info);
300 }
301
302
303 /* Function is_simple_and_all_uses_invariant
304
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
306
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
310 {
311 tree op;
312 ssa_op_iter iter;
313
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
317
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
319 {
320 enum vect_def_type dt = vect_uninitialized_def;
321
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
323 {
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
328 }
329
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
332 }
333 return true;
334 }
335
336 /* Function vect_stmt_relevant_p.
337
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
340
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
345 - it is an induction and we have multiple exits.
346
347 CHECKME: what other side effects would the vectorizer allow? */
348
349 static bool
350 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
351 enum vect_relevant *relevant, bool *live_p)
352 {
353 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
354 ssa_op_iter op_iter;
355 imm_use_iterator imm_iter;
356 use_operand_p use_p;
357 def_operand_p def_p;
358
359 *relevant = vect_unused_in_scope;
360 *live_p = false;
361
362 /* cond stmt other than loop exit cond. */
363 gimple *stmt = STMT_VINFO_STMT (stmt_info);
364 if (is_ctrl_stmt (stmt)
365 && LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
366 && (!loop->inner || gimple_bb (stmt)->loop_father == loop))
367 *relevant = vect_used_in_scope;
368
369 /* changing memory. */
370 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
371 if (gimple_vdef (stmt_info->stmt)
372 && !gimple_clobber_p (stmt_info->stmt))
373 {
374 if (dump_enabled_p ())
375 dump_printf_loc (MSG_NOTE, vect_location,
376 "vec_stmt_relevant_p: stmt has vdefs.\n");
377 *relevant = vect_used_in_scope;
378 }
379
380 /* uses outside the loop. */
381 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
382 {
383 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
384 {
385 basic_block bb = gimple_bb (USE_STMT (use_p));
386 if (!flow_bb_inside_loop_p (loop, bb))
387 {
388 if (is_gimple_debug (USE_STMT (use_p)))
389 continue;
390
391 if (dump_enabled_p ())
392 dump_printf_loc (MSG_NOTE, vect_location,
393 "vec_stmt_relevant_p: used out of loop.\n");
394
395 /* We expect all such uses to be in the loop exit phis
396 (because of loop closed form) */
397 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
398
399 *live_p = true;
400 }
401 }
402 }
403
404 /* Check if it's an induction and multiple exits. In this case there will be
405 a usage later on after peeling which is needed for the alternate exit. */
406 if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
407 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
408 {
409 if (dump_enabled_p ())
410 dump_printf_loc (MSG_NOTE, vect_location,
411 "vec_stmt_relevant_p: induction forced for "
412 "early break.\n");
413 *live_p = true;
414
415 }
416
417 if (*live_p && *relevant == vect_unused_in_scope
418 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
419 {
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_NOTE, vect_location,
422 "vec_stmt_relevant_p: stmt live but not relevant.\n");
423 *relevant = vect_used_only_live;
424 }
425
426 return (*live_p || *relevant);
427 }
428
429
430 /* Function exist_non_indexing_operands_for_use_p
431
432 USE is one of the uses attached to STMT_INFO. Check if USE is
433 used in STMT_INFO for anything other than indexing an array. */
434
435 static bool
436 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
437 {
438 tree operand;
439
440 /* USE corresponds to some operand in STMT. If there is no data
441 reference in STMT, then any operand that corresponds to USE
442 is not indexing an array. */
443 if (!STMT_VINFO_DATA_REF (stmt_info))
444 return true;
445
446 /* STMT has a data_ref. FORNOW this means that its of one of
447 the following forms:
448 -1- ARRAY_REF = var
449 -2- var = ARRAY_REF
450 (This should have been verified in analyze_data_refs).
451
452 'var' in the second case corresponds to a def, not a use,
453 so USE cannot correspond to any operands that are not used
454 for array indexing.
455
456 Therefore, all we need to check is if STMT falls into the
457 first case, and whether var corresponds to USE. */
458
459 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
460 if (!assign || !gimple_assign_copy_p (assign))
461 {
462 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
463 if (call && gimple_call_internal_p (call))
464 {
465 internal_fn ifn = gimple_call_internal_fn (call);
466 int mask_index = internal_fn_mask_index (ifn);
467 if (mask_index >= 0
468 && use == gimple_call_arg (call, mask_index))
469 return true;
470 int stored_value_index = internal_fn_stored_value_index (ifn);
471 if (stored_value_index >= 0
472 && use == gimple_call_arg (call, stored_value_index))
473 return true;
474 if (internal_gather_scatter_fn_p (ifn)
475 && use == gimple_call_arg (call, 1))
476 return true;
477 }
478 return false;
479 }
480
481 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
482 return false;
483 operand = gimple_assign_rhs1 (assign);
484 if (TREE_CODE (operand) != SSA_NAME)
485 return false;
486
487 if (operand == use)
488 return true;
489
490 return false;
491 }
492
493
494 /*
495 Function process_use.
496
497 Inputs:
498 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
499 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
500 that defined USE. This is done by calling mark_relevant and passing it
501 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
502 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
503 be performed.
504
505 Outputs:
506 Generally, LIVE_P and RELEVANT are used to define the liveness and
507 relevance info of the DEF_STMT of this USE:
508 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
509 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
510 Exceptions:
511 - case 1: If USE is used only for address computations (e.g. array indexing),
512 which does not need to be directly vectorized, then the liveness/relevance
513 of the respective DEF_STMT is left unchanged.
514 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
515 we skip DEF_STMT cause it had already been processed.
516 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
517 "relevant" will be modified accordingly.
518
519 Return true if everything is as expected. Return false otherwise. */
520
521 static opt_result
522 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
523 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
524 bool force)
525 {
526 stmt_vec_info dstmt_vinfo;
527 enum vect_def_type dt;
528
529 /* case 1: we are only interested in uses that need to be vectorized. Uses
530 that are used for address computation are not considered relevant. */
531 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
532 return opt_result::success ();
533
534 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
535 return opt_result::failure_at (stmt_vinfo->stmt,
536 "not vectorized:"
537 " unsupported use in stmt.\n");
538
539 if (!dstmt_vinfo)
540 return opt_result::success ();
541
542 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
543 basic_block bb = gimple_bb (stmt_vinfo->stmt);
544
545 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
546 We have to force the stmt live since the epilogue loop needs it to
547 continue computing the reduction. */
548 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
549 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
550 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
551 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
552 && bb->loop_father == def_bb->loop_father)
553 {
554 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE, vect_location,
556 "reduc-stmt defining reduc-phi in the same nest.\n");
557 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
558 return opt_result::success ();
559 }
560
561 /* case 3a: outer-loop stmt defining an inner-loop stmt:
562 outer-loop-header-bb:
563 d = dstmt_vinfo
564 inner-loop:
565 stmt # use (d)
566 outer-loop-tail-bb:
567 ... */
568 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
569 {
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE, vect_location,
572 "outer-loop def-stmt defining inner-loop stmt.\n");
573
574 switch (relevant)
575 {
576 case vect_unused_in_scope:
577 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
578 vect_used_in_scope : vect_unused_in_scope;
579 break;
580
581 case vect_used_in_outer_by_reduction:
582 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
583 relevant = vect_used_by_reduction;
584 break;
585
586 case vect_used_in_outer:
587 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
588 relevant = vect_used_in_scope;
589 break;
590
591 case vect_used_in_scope:
592 break;
593
594 default:
595 gcc_unreachable ();
596 }
597 }
598
599 /* case 3b: inner-loop stmt defining an outer-loop stmt:
600 outer-loop-header-bb:
601 ...
602 inner-loop:
603 d = dstmt_vinfo
604 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
605 stmt # use (d) */
606 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
607 {
608 if (dump_enabled_p ())
609 dump_printf_loc (MSG_NOTE, vect_location,
610 "inner-loop def-stmt defining outer-loop stmt.\n");
611
612 switch (relevant)
613 {
614 case vect_unused_in_scope:
615 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
616 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
617 vect_used_in_outer_by_reduction : vect_unused_in_scope;
618 break;
619
620 case vect_used_by_reduction:
621 case vect_used_only_live:
622 relevant = vect_used_in_outer_by_reduction;
623 break;
624
625 case vect_used_in_scope:
626 relevant = vect_used_in_outer;
627 break;
628
629 default:
630 gcc_unreachable ();
631 }
632 }
633 /* We are also not interested in uses on loop PHI backedges that are
634 inductions. Otherwise we'll needlessly vectorize the IV increment
635 and cause hybrid SLP for SLP inductions. Unless the PHI is live
636 of course. */
637 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
638 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
639 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
640 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
641 loop_latch_edge (bb->loop_father))
642 == use))
643 {
644 if (dump_enabled_p ())
645 dump_printf_loc (MSG_NOTE, vect_location,
646 "induction value on backedge.\n");
647 return opt_result::success ();
648 }
649
650
651 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
652 return opt_result::success ();
653 }
654
655
656 /* Function vect_mark_stmts_to_be_vectorized.
657
658 Not all stmts in the loop need to be vectorized. For example:
659
660 for i...
661 for j...
662 1. T0 = i + j
663 2. T1 = a[T0]
664
665 3. j = j + 1
666
667 Stmt 1 and 3 do not need to be vectorized, because loop control and
668 addressing of vectorized data-refs are handled differently.
669
670 This pass detects such stmts. */
671
672 opt_result
673 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
674 {
675 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
676 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
677 unsigned int nbbs = loop->num_nodes;
678 gimple_stmt_iterator si;
679 unsigned int i;
680 basic_block bb;
681 bool live_p;
682 enum vect_relevant relevant;
683
684 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
685
686 auto_vec<stmt_vec_info, 64> worklist;
687
688 /* 1. Init worklist. */
689 for (i = 0; i < nbbs; i++)
690 {
691 bb = bbs[i];
692 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
693 {
694 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
697 phi_info->stmt);
698
699 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
700 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
701 }
702 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
703 {
704 if (is_gimple_debug (gsi_stmt (si)))
705 continue;
706 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE, vect_location,
709 "init: stmt relevant? %G", stmt_info->stmt);
710
711 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
712 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
713 }
714 }
715
716 /* 2. Process_worklist */
717 while (worklist.length () > 0)
718 {
719 use_operand_p use_p;
720 ssa_op_iter iter;
721
722 stmt_vec_info stmt_vinfo = worklist.pop ();
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE, vect_location,
725 "worklist: examine stmt: %G", stmt_vinfo->stmt);
726
727 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
728 (DEF_STMT) as relevant/irrelevant according to the relevance property
729 of STMT. */
730 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
731
732 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
733 propagated as is to the DEF_STMTs of its USEs.
734
735 One exception is when STMT has been identified as defining a reduction
736 variable; in this case we set the relevance to vect_used_by_reduction.
737 This is because we distinguish between two kinds of relevant stmts -
738 those that are used by a reduction computation, and those that are
739 (also) used by a regular computation. This allows us later on to
740 identify stmts that are used solely by a reduction, and therefore the
741 order of the results that they produce does not have to be kept. */
742
743 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
744 {
745 case vect_reduction_def:
746 gcc_assert (relevant != vect_unused_in_scope);
747 if (relevant != vect_unused_in_scope
748 && relevant != vect_used_in_scope
749 && relevant != vect_used_by_reduction
750 && relevant != vect_used_only_live)
751 return opt_result::failure_at
752 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
753 break;
754
755 case vect_nested_cycle:
756 if (relevant != vect_unused_in_scope
757 && relevant != vect_used_in_outer_by_reduction
758 && relevant != vect_used_in_outer)
759 return opt_result::failure_at
760 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
761 break;
762
763 case vect_double_reduction_def:
764 if (relevant != vect_unused_in_scope
765 && relevant != vect_used_by_reduction
766 && relevant != vect_used_only_live)
767 return opt_result::failure_at
768 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
769 break;
770
771 default:
772 break;
773 }
774
775 if (is_pattern_stmt_p (stmt_vinfo))
776 {
777 /* Pattern statements are not inserted into the code, so
778 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779 have to scan the RHS or function arguments instead. */
780 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
781 {
782 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
783 tree op = gimple_assign_rhs1 (assign);
784
785 i = 1;
786 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
787 {
788 opt_result res
789 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
790 loop_vinfo, relevant, &worklist, false);
791 if (!res)
792 return res;
793 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
794 loop_vinfo, relevant, &worklist, false);
795 if (!res)
796 return res;
797 i = 2;
798 }
799 for (; i < gimple_num_ops (assign); i++)
800 {
801 op = gimple_op (assign, i);
802 if (TREE_CODE (op) == SSA_NAME)
803 {
804 opt_result res
805 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
806 &worklist, false);
807 if (!res)
808 return res;
809 }
810 }
811 }
812 else if (gcond *cond = dyn_cast <gcond *> (stmt_vinfo->stmt))
813 {
814 tree_code rhs_code = gimple_cond_code (cond);
815 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
816 opt_result res
817 = process_use (stmt_vinfo, gimple_cond_lhs (cond),
818 loop_vinfo, relevant, &worklist, false);
819 if (!res)
820 return res;
821 res = process_use (stmt_vinfo, gimple_cond_rhs (cond),
822 loop_vinfo, relevant, &worklist, false);
823 if (!res)
824 return res;
825 }
826 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
827 {
828 for (i = 0; i < gimple_call_num_args (call); i++)
829 {
830 tree arg = gimple_call_arg (call, i);
831 opt_result res
832 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
833 &worklist, false);
834 if (!res)
835 return res;
836 }
837 }
838 else
839 gcc_unreachable ();
840 }
841 else
842 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
843 {
844 tree op = USE_FROM_PTR (use_p);
845 opt_result res
846 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
847 &worklist, false);
848 if (!res)
849 return res;
850 }
851
852 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
853 {
854 gather_scatter_info gs_info;
855 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
856 gcc_unreachable ();
857 opt_result res
858 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
859 &worklist, true);
860 if (!res)
861 {
862 if (fatal)
863 *fatal = false;
864 return res;
865 }
866 }
867 } /* while worklist */
868
869 return opt_result::success ();
870 }
871
872 /* Function vect_model_simple_cost.
873
874 Models cost for simple operations, i.e. those that only emit ncopies of a
875 single op. Right now, this does not account for multiple insns that could
876 be generated for the single vector op. We will handle that shortly. */
877
878 static void
879 vect_model_simple_cost (vec_info *,
880 stmt_vec_info stmt_info, int ncopies,
881 enum vect_def_type *dt,
882 int ndts,
883 slp_tree node,
884 stmt_vector_for_cost *cost_vec,
885 vect_cost_for_stmt kind = vector_stmt)
886 {
887 int inside_cost = 0, prologue_cost = 0;
888
889 gcc_assert (cost_vec != NULL);
890
891 /* ??? Somehow we need to fix this at the callers. */
892 if (node)
893 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
894
895 if (!node)
896 /* Cost the "broadcast" of a scalar operand in to a vector operand.
897 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
898 cost model. */
899 for (int i = 0; i < ndts; i++)
900 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
901 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
902 stmt_info, 0, vect_prologue);
903
904 /* Pass the inside-of-loop statements to the target-specific cost model. */
905 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
906 stmt_info, 0, vect_body);
907
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_NOTE, vect_location,
910 "vect_model_simple_cost: inside_cost = %d, "
911 "prologue_cost = %d .\n", inside_cost, prologue_cost);
912 }
913
914
915 /* Model cost for type demotion and promotion operations. PWR is
916 normally zero for single-step promotions and demotions. It will be
917 one if two-step promotion/demotion is required, and so on. NCOPIES
918 is the number of vector results (and thus number of instructions)
919 for the narrowest end of the operation chain. Each additional
920 step doubles the number of instructions required. If WIDEN_ARITH
921 is true the stmt is doing widening arithmetic. */
922
923 static void
924 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
925 enum vect_def_type *dt,
926 unsigned int ncopies, int pwr,
927 stmt_vector_for_cost *cost_vec,
928 bool widen_arith)
929 {
930 int i;
931 int inside_cost = 0, prologue_cost = 0;
932
933 for (i = 0; i < pwr + 1; i++)
934 {
935 inside_cost += record_stmt_cost (cost_vec, ncopies,
936 widen_arith
937 ? vector_stmt : vec_promote_demote,
938 stmt_info, 0, vect_body);
939 ncopies *= 2;
940 }
941
942 /* FORNOW: Assuming maximum 2 args per stmts. */
943 for (i = 0; i < 2; i++)
944 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
945 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
946 stmt_info, 0, vect_prologue);
947
948 if (dump_enabled_p ())
949 dump_printf_loc (MSG_NOTE, vect_location,
950 "vect_model_promotion_demotion_cost: inside_cost = %d, "
951 "prologue_cost = %d .\n", inside_cost, prologue_cost);
952 }
953
954 /* Returns true if the current function returns DECL. */
955
956 static bool
957 cfun_returns (tree decl)
958 {
959 edge_iterator ei;
960 edge e;
961 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
962 {
963 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
964 if (!ret)
965 continue;
966 if (gimple_return_retval (ret) == decl)
967 return true;
968 /* We often end up with an aggregate copy to the result decl,
969 handle that case as well. First skip intermediate clobbers
970 though. */
971 gimple *def = ret;
972 do
973 {
974 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
975 }
976 while (gimple_clobber_p (def));
977 if (is_a <gassign *> (def)
978 && gimple_assign_lhs (def) == gimple_return_retval (ret)
979 && gimple_assign_rhs1 (def) == decl)
980 return true;
981 }
982 return false;
983 }
984
985 /* Calculate cost of DR's memory access. */
986 void
987 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
988 dr_alignment_support alignment_support_scheme,
989 int misalignment,
990 unsigned int *inside_cost,
991 stmt_vector_for_cost *body_cost_vec)
992 {
993 switch (alignment_support_scheme)
994 {
995 case dr_aligned:
996 {
997 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
998 vector_store, stmt_info, 0,
999 vect_body);
1000
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE, vect_location,
1003 "vect_model_store_cost: aligned.\n");
1004 break;
1005 }
1006
1007 case dr_unaligned_supported:
1008 {
1009 /* Here, we assign an additional cost for the unaligned store. */
1010 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1011 unaligned_store, stmt_info,
1012 misalignment, vect_body);
1013 if (dump_enabled_p ())
1014 dump_printf_loc (MSG_NOTE, vect_location,
1015 "vect_model_store_cost: unaligned supported by "
1016 "hardware.\n");
1017 break;
1018 }
1019
1020 case dr_unaligned_unsupported:
1021 {
1022 *inside_cost = VECT_MAX_COST;
1023
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1026 "vect_model_store_cost: unsupported access.\n");
1027 break;
1028 }
1029
1030 default:
1031 gcc_unreachable ();
1032 }
1033 }
1034
1035 /* Calculate cost of DR's memory access. */
1036 void
1037 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1038 dr_alignment_support alignment_support_scheme,
1039 int misalignment,
1040 bool add_realign_cost, unsigned int *inside_cost,
1041 unsigned int *prologue_cost,
1042 stmt_vector_for_cost *prologue_cost_vec,
1043 stmt_vector_for_cost *body_cost_vec,
1044 bool record_prologue_costs)
1045 {
1046 switch (alignment_support_scheme)
1047 {
1048 case dr_aligned:
1049 {
1050 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1051 stmt_info, 0, vect_body);
1052
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_load_cost: aligned.\n");
1056
1057 break;
1058 }
1059 case dr_unaligned_supported:
1060 {
1061 /* Here, we assign an additional cost for the unaligned load. */
1062 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1063 unaligned_load, stmt_info,
1064 misalignment, vect_body);
1065
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_NOTE, vect_location,
1068 "vect_model_load_cost: unaligned supported by "
1069 "hardware.\n");
1070
1071 break;
1072 }
1073 case dr_explicit_realign:
1074 {
1075 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1076 vector_load, stmt_info, 0, vect_body);
1077 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1078 vec_perm, stmt_info, 0, vect_body);
1079
1080 /* FIXME: If the misalignment remains fixed across the iterations of
1081 the containing loop, the following cost should be added to the
1082 prologue costs. */
1083 if (targetm.vectorize.builtin_mask_for_load)
1084 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1085 stmt_info, 0, vect_body);
1086
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE, vect_location,
1089 "vect_model_load_cost: explicit realign\n");
1090
1091 break;
1092 }
1093 case dr_explicit_realign_optimized:
1094 {
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE, vect_location,
1097 "vect_model_load_cost: unaligned software "
1098 "pipelined.\n");
1099
1100 /* Unaligned software pipeline has a load of an address, an initial
1101 load, and possibly a mask operation to "prime" the loop. However,
1102 if this is an access in a group of loads, which provide grouped
1103 access, then the above cost should only be considered for one
1104 access in the group. Inside the loop, there is a load op
1105 and a realignment op. */
1106
1107 if (add_realign_cost && record_prologue_costs)
1108 {
1109 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1110 vector_stmt, stmt_info,
1111 0, vect_prologue);
1112 if (targetm.vectorize.builtin_mask_for_load)
1113 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1114 vector_stmt, stmt_info,
1115 0, vect_prologue);
1116 }
1117
1118 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1119 stmt_info, 0, vect_body);
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1121 stmt_info, 0, vect_body);
1122
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE, vect_location,
1125 "vect_model_load_cost: explicit realign optimized"
1126 "\n");
1127
1128 break;
1129 }
1130
1131 case dr_unaligned_unsupported:
1132 {
1133 *inside_cost = VECT_MAX_COST;
1134
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1137 "vect_model_load_cost: unsupported access.\n");
1138 break;
1139 }
1140
1141 default:
1142 gcc_unreachable ();
1143 }
1144 }
1145
1146 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1147 the loop preheader for the vectorized stmt STMT_VINFO. */
1148
1149 static void
1150 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1151 gimple_stmt_iterator *gsi)
1152 {
1153 if (gsi)
1154 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1155 else
1156 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1157
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "created new init_stmt: %G", new_stmt);
1161 }
1162
1163 /* Function vect_init_vector.
1164
1165 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1166 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1167 vector type a vector with all elements equal to VAL is created first.
1168 Place the initialization at GSI if it is not NULL. Otherwise, place the
1169 initialization at the loop preheader.
1170 Return the DEF of INIT_STMT.
1171 It will be used in the vectorization of STMT_INFO. */
1172
1173 tree
1174 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1175 gimple_stmt_iterator *gsi)
1176 {
1177 gimple *init_stmt;
1178 tree new_temp;
1179
1180 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1181 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1182 {
1183 gcc_assert (VECTOR_TYPE_P (type));
1184 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1185 {
1186 /* Scalar boolean value should be transformed into
1187 all zeros or all ones value before building a vector. */
1188 if (VECTOR_BOOLEAN_TYPE_P (type))
1189 {
1190 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1191 tree false_val = build_zero_cst (TREE_TYPE (type));
1192
1193 if (CONSTANT_CLASS_P (val))
1194 val = integer_zerop (val) ? false_val : true_val;
1195 else
1196 {
1197 new_temp = make_ssa_name (TREE_TYPE (type));
1198 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1199 val, true_val, false_val);
1200 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1201 val = new_temp;
1202 }
1203 }
1204 else
1205 {
1206 gimple_seq stmts = NULL;
1207 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1208 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1209 TREE_TYPE (type), val);
1210 else
1211 /* ??? Condition vectorization expects us to do
1212 promotion of invariant/external defs. */
1213 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1214 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1215 !gsi_end_p (gsi2); )
1216 {
1217 init_stmt = gsi_stmt (gsi2);
1218 gsi_remove (&gsi2, false);
1219 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1220 }
1221 }
1222 }
1223 val = build_vector_from_val (type, val);
1224 }
1225
1226 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1227 init_stmt = gimple_build_assign (new_temp, val);
1228 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1229 return new_temp;
1230 }
1231
1232
1233 /* Function vect_get_vec_defs_for_operand.
1234
1235 OP is an operand in STMT_VINFO. This function returns a vector of
1236 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1237
1238 In the case that OP is an SSA_NAME which is defined in the loop, then
1239 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1240
1241 In case OP is an invariant or constant, a new stmt that creates a vector def
1242 needs to be introduced. VECTYPE may be used to specify a required type for
1243 vector invariant. */
1244
1245 void
1246 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1247 unsigned ncopies,
1248 tree op, vec<tree> *vec_oprnds, tree vectype)
1249 {
1250 gimple *def_stmt;
1251 enum vect_def_type dt;
1252 bool is_simple_use;
1253 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1254
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "vect_get_vec_defs_for_operand: %T\n", op);
1258
1259 stmt_vec_info def_stmt_info;
1260 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1261 &def_stmt_info, &def_stmt);
1262 gcc_assert (is_simple_use);
1263 if (def_stmt && dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1265
1266 vec_oprnds->create (ncopies);
1267 if (dt == vect_constant_def || dt == vect_external_def)
1268 {
1269 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1270 tree vector_type;
1271
1272 if (vectype)
1273 vector_type = vectype;
1274 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1275 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1276 vector_type = truth_type_for (stmt_vectype);
1277 else
1278 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1279
1280 gcc_assert (vector_type);
1281 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1282 while (ncopies--)
1283 vec_oprnds->quick_push (vop);
1284 }
1285 else
1286 {
1287 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1288 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1289 for (unsigned i = 0; i < ncopies; ++i)
1290 vec_oprnds->quick_push (gimple_get_lhs
1291 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1292 }
1293 }
1294
1295
1296 /* Get vectorized definitions for OP0 and OP1. */
1297
1298 void
1299 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1300 unsigned ncopies,
1301 tree op0, tree vectype0, vec<tree> *vec_oprnds0,
1302 tree op1, tree vectype1, vec<tree> *vec_oprnds1,
1303 tree op2, tree vectype2, vec<tree> *vec_oprnds2,
1304 tree op3, tree vectype3, vec<tree> *vec_oprnds3)
1305 {
1306 if (slp_node)
1307 {
1308 if (op0)
1309 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1310 if (op1)
1311 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1312 if (op2)
1313 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1314 if (op3)
1315 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1316 }
1317 else
1318 {
1319 if (op0)
1320 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1321 op0, vec_oprnds0, vectype0);
1322 if (op1)
1323 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1324 op1, vec_oprnds1, vectype1);
1325 if (op2)
1326 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1327 op2, vec_oprnds2, vectype2);
1328 if (op3)
1329 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1330 op3, vec_oprnds3, vectype3);
1331 }
1332 }
1333
1334 void
1335 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1336 unsigned ncopies,
1337 tree op0, vec<tree> *vec_oprnds0,
1338 tree op1, vec<tree> *vec_oprnds1,
1339 tree op2, vec<tree> *vec_oprnds2,
1340 tree op3, vec<tree> *vec_oprnds3)
1341 {
1342 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1343 op0, NULL_TREE, vec_oprnds0,
1344 op1, NULL_TREE, vec_oprnds1,
1345 op2, NULL_TREE, vec_oprnds2,
1346 op3, NULL_TREE, vec_oprnds3);
1347 }
1348
1349 /* Helper function called by vect_finish_replace_stmt and
1350 vect_finish_stmt_generation. Set the location of the new
1351 statement and create and return a stmt_vec_info for it. */
1352
1353 static void
1354 vect_finish_stmt_generation_1 (vec_info *,
1355 stmt_vec_info stmt_info, gimple *vec_stmt)
1356 {
1357 if (dump_enabled_p ())
1358 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1359
1360 if (stmt_info)
1361 {
1362 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1363
1364 /* While EH edges will generally prevent vectorization, stmt might
1365 e.g. be in a must-not-throw region. Ensure newly created stmts
1366 that could throw are part of the same region. */
1367 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1368 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1369 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1370 }
1371 else
1372 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1373 }
1374
1375 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1376 which sets the same scalar result as STMT_INFO did. Create and return a
1377 stmt_vec_info for VEC_STMT. */
1378
1379 void
1380 vect_finish_replace_stmt (vec_info *vinfo,
1381 stmt_vec_info stmt_info, gimple *vec_stmt)
1382 {
1383 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1384 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1385
1386 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1387 gsi_replace (&gsi, vec_stmt, true);
1388
1389 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1390 }
1391
1392 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1393 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1394
1395 void
1396 vect_finish_stmt_generation (vec_info *vinfo,
1397 stmt_vec_info stmt_info, gimple *vec_stmt,
1398 gimple_stmt_iterator *gsi)
1399 {
1400 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1401
1402 if (!gsi_end_p (*gsi)
1403 && gimple_has_mem_ops (vec_stmt))
1404 {
1405 gimple *at_stmt = gsi_stmt (*gsi);
1406 tree vuse = gimple_vuse (at_stmt);
1407 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1408 {
1409 tree vdef = gimple_vdef (at_stmt);
1410 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1411 gimple_set_modified (vec_stmt, true);
1412 /* If we have an SSA vuse and insert a store, update virtual
1413 SSA form to avoid triggering the renamer. Do so only
1414 if we can easily see all uses - which is what almost always
1415 happens with the way vectorized stmts are inserted. */
1416 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1417 && ((is_gimple_assign (vec_stmt)
1418 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1419 || (is_gimple_call (vec_stmt)
1420 && (!(gimple_call_flags (vec_stmt)
1421 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1422 || (gimple_call_lhs (vec_stmt)
1423 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1424 {
1425 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1426 gimple_set_vdef (vec_stmt, new_vdef);
1427 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1428 }
1429 }
1430 }
1431 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1432 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1433 }
1434
1435 /* We want to vectorize a call to combined function CFN with function
1436 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1437 as the types of all inputs. Check whether this is possible using
1438 an internal function, returning its code if so or IFN_LAST if not. */
1439
1440 static internal_fn
1441 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1442 tree vectype_out, tree vectype_in)
1443 {
1444 internal_fn ifn;
1445 if (internal_fn_p (cfn))
1446 ifn = as_internal_fn (cfn);
1447 else
1448 ifn = associated_internal_fn (fndecl);
1449 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1450 {
1451 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1452 if (info.vectorizable)
1453 {
1454 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1455 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1456 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1457
1458 /* The type size of both the vectype_in and vectype_out should be
1459 exactly the same when vectype_out isn't participating the optab.
1460 While there is no restriction for type size when vectype_out
1461 is part of the optab query. */
1462 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1463 return IFN_LAST;
1464
1465 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1466 OPTIMIZE_FOR_SPEED))
1467 return ifn;
1468 }
1469 }
1470 return IFN_LAST;
1471 }
1472
1473
1474 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1475 gimple_stmt_iterator *);
1476
1477 /* Check whether a load or store statement in the loop described by
1478 LOOP_VINFO is possible in a loop using partial vectors. This is
1479 testing whether the vectorizer pass has the appropriate support,
1480 as well as whether the target does.
1481
1482 VLS_TYPE says whether the statement is a load or store and VECTYPE
1483 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1484 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1485 says how the load or store is going to be implemented and GROUP_SIZE
1486 is the number of load or store statements in the containing group.
1487 If the access is a gather load or scatter store, GS_INFO describes
1488 its arguments. If the load or store is conditional, SCALAR_MASK is the
1489 condition under which it occurs.
1490
1491 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1492 vectors is not supported, otherwise record the required rgroup control
1493 types. */
1494
1495 static void
1496 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1497 slp_tree slp_node,
1498 vec_load_store_type vls_type,
1499 int group_size,
1500 vect_memory_access_type
1501 memory_access_type,
1502 gather_scatter_info *gs_info,
1503 tree scalar_mask)
1504 {
1505 /* Invariant loads need no special support. */
1506 if (memory_access_type == VMAT_INVARIANT)
1507 return;
1508
1509 unsigned int nvectors;
1510 if (slp_node)
1511 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1512 else
1513 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1514
1515 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1516 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1517 machine_mode vecmode = TYPE_MODE (vectype);
1518 bool is_load = (vls_type == VLS_LOAD);
1519 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1520 {
1521 internal_fn ifn
1522 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1523 : vect_store_lanes_supported (vectype, group_size, true));
1524 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1525 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1526 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1527 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528 scalar_mask);
1529 else
1530 {
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " load/store-lanes instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1537 }
1538 return;
1539 }
1540
1541 if (memory_access_type == VMAT_GATHER_SCATTER)
1542 {
1543 internal_fn ifn = (is_load
1544 ? IFN_MASK_GATHER_LOAD
1545 : IFN_MASK_SCATTER_STORE);
1546 internal_fn len_ifn = (is_load
1547 ? IFN_MASK_LEN_GATHER_LOAD
1548 : IFN_MASK_LEN_SCATTER_STORE);
1549 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1550 gs_info->memory_type,
1551 gs_info->offset_vectype,
1552 gs_info->scale))
1553 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1554 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1555 gs_info->memory_type,
1556 gs_info->offset_vectype,
1557 gs_info->scale))
1558 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1559 scalar_mask);
1560 else
1561 {
1562 if (dump_enabled_p ())
1563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1564 "can't operate on partial vectors because"
1565 " the target doesn't have an appropriate"
1566 " gather load or scatter store instruction.\n");
1567 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1568 }
1569 return;
1570 }
1571
1572 if (memory_access_type != VMAT_CONTIGUOUS
1573 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1574 {
1575 /* Element X of the data must come from iteration i * VF + X of the
1576 scalar loop. We need more work to support other mappings. */
1577 if (dump_enabled_p ())
1578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1579 "can't operate on partial vectors because an"
1580 " access isn't contiguous.\n");
1581 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1582 return;
1583 }
1584
1585 if (!VECTOR_MODE_P (vecmode))
1586 {
1587 if (dump_enabled_p ())
1588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1589 "can't operate on partial vectors when emulating"
1590 " vector operations.\n");
1591 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1592 return;
1593 }
1594
1595 /* We might load more scalars than we need for permuting SLP loads.
1596 We checked in get_group_load_store_type that the extra elements
1597 don't leak into a new vector. */
1598 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1599 {
1600 unsigned int nvectors;
1601 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1602 return nvectors;
1603 gcc_unreachable ();
1604 };
1605
1606 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1607 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1608 machine_mode mask_mode;
1609 machine_mode vmode;
1610 bool using_partial_vectors_p = false;
1611 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1612 {
1613 nvectors = group_memory_nvectors (group_size * vf, nunits);
1614 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1615 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1616 using_partial_vectors_p = true;
1617 }
1618 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1619 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1620 {
1621 nvectors = group_memory_nvectors (group_size * vf, nunits);
1622 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1623 using_partial_vectors_p = true;
1624 }
1625
1626 if (!using_partial_vectors_p)
1627 {
1628 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1630 "can't operate on partial vectors because the"
1631 " target doesn't have the appropriate partial"
1632 " vectorization load or store.\n");
1633 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1634 }
1635 }
1636
1637 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1638 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1639 that needs to be applied to all loads and stores in a vectorized loop.
1640 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1641 otherwise return VEC_MASK & LOOP_MASK.
1642
1643 MASK_TYPE is the type of both masks. If new statements are needed,
1644 insert them before GSI. */
1645
1646 static tree
1647 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1648 tree vec_mask, gimple_stmt_iterator *gsi)
1649 {
1650 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1651 if (!loop_mask)
1652 return vec_mask;
1653
1654 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1655
1656 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1657 return vec_mask;
1658
1659 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1660 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1661 vec_mask, loop_mask);
1662
1663 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1664 return and_res;
1665 }
1666
1667 /* Determine whether we can use a gather load or scatter store to vectorize
1668 strided load or store STMT_INFO by truncating the current offset to a
1669 smaller width. We need to be able to construct an offset vector:
1670
1671 { 0, X, X*2, X*3, ... }
1672
1673 without loss of precision, where X is STMT_INFO's DR_STEP.
1674
1675 Return true if this is possible, describing the gather load or scatter
1676 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1677
1678 static bool
1679 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1680 loop_vec_info loop_vinfo, bool masked_p,
1681 gather_scatter_info *gs_info)
1682 {
1683 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1684 data_reference *dr = dr_info->dr;
1685 tree step = DR_STEP (dr);
1686 if (TREE_CODE (step) != INTEGER_CST)
1687 {
1688 /* ??? Perhaps we could use range information here? */
1689 if (dump_enabled_p ())
1690 dump_printf_loc (MSG_NOTE, vect_location,
1691 "cannot truncate variable step.\n");
1692 return false;
1693 }
1694
1695 /* Get the number of bits in an element. */
1696 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1697 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1698 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1699
1700 /* Set COUNT to the upper limit on the number of elements - 1.
1701 Start with the maximum vectorization factor. */
1702 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1703
1704 /* Try lowering COUNT to the number of scalar latch iterations. */
1705 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1706 widest_int max_iters;
1707 if (max_loop_iterations (loop, &max_iters)
1708 && max_iters < count)
1709 count = max_iters.to_shwi ();
1710
1711 /* Try scales of 1 and the element size. */
1712 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1713 wi::overflow_type overflow = wi::OVF_NONE;
1714 for (int i = 0; i < 2; ++i)
1715 {
1716 int scale = scales[i];
1717 widest_int factor;
1718 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1719 continue;
1720
1721 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1722 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1723 if (overflow)
1724 continue;
1725 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1726 unsigned int min_offset_bits = wi::min_precision (range, sign);
1727
1728 /* Find the narrowest viable offset type. */
1729 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1730 tree offset_type = build_nonstandard_integer_type (offset_bits,
1731 sign == UNSIGNED);
1732
1733 /* See whether the target supports the operation with an offset
1734 no narrower than OFFSET_TYPE. */
1735 tree memory_type = TREE_TYPE (DR_REF (dr));
1736 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1737 vectype, memory_type, offset_type, scale,
1738 &gs_info->ifn, &gs_info->offset_vectype)
1739 || gs_info->ifn == IFN_LAST)
1740 continue;
1741
1742 gs_info->decl = NULL_TREE;
1743 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1744 but we don't need to store that here. */
1745 gs_info->base = NULL_TREE;
1746 gs_info->element_type = TREE_TYPE (vectype);
1747 gs_info->offset = fold_convert (offset_type, step);
1748 gs_info->offset_dt = vect_constant_def;
1749 gs_info->scale = scale;
1750 gs_info->memory_type = memory_type;
1751 return true;
1752 }
1753
1754 if (overflow && dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "truncating gather/scatter offset to %d bits"
1757 " might change its value.\n", element_bits);
1758
1759 return false;
1760 }
1761
1762 /* Return true if we can use gather/scatter internal functions to
1763 vectorize STMT_INFO, which is a grouped or strided load or store.
1764 MASKED_P is true if load or store is conditional. When returning
1765 true, fill in GS_INFO with the information required to perform the
1766 operation. */
1767
1768 static bool
1769 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1770 loop_vec_info loop_vinfo, bool masked_p,
1771 gather_scatter_info *gs_info)
1772 {
1773 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1774 || gs_info->ifn == IFN_LAST)
1775 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1776 masked_p, gs_info);
1777
1778 tree old_offset_type = TREE_TYPE (gs_info->offset);
1779 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1780
1781 gcc_assert (TYPE_PRECISION (new_offset_type)
1782 >= TYPE_PRECISION (old_offset_type));
1783 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1784
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_NOTE, vect_location,
1787 "using gather/scatter for strided/grouped access,"
1788 " scale = %d\n", gs_info->scale);
1789
1790 return true;
1791 }
1792
1793 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1794 elements with a known constant step. Return -1 if that step
1795 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1796
1797 static int
1798 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1799 {
1800 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1801 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1802 size_zero_node);
1803 }
1804
1805 /* If the target supports a permute mask that reverses the elements in
1806 a vector of type VECTYPE, return that mask, otherwise return null. */
1807
1808 tree
1809 perm_mask_for_reverse (tree vectype)
1810 {
1811 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1812
1813 /* The encoding has a single stepped pattern. */
1814 vec_perm_builder sel (nunits, 1, 3);
1815 for (int i = 0; i < 3; ++i)
1816 sel.quick_push (nunits - 1 - i);
1817
1818 vec_perm_indices indices (sel, 1, nunits);
1819 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1820 indices))
1821 return NULL_TREE;
1822 return vect_gen_perm_mask_checked (vectype, indices);
1823 }
1824
1825 /* A subroutine of get_load_store_type, with a subset of the same
1826 arguments. Handle the case where STMT_INFO is a load or store that
1827 accesses consecutive elements with a negative step. Sets *POFFSET
1828 to the offset to be applied to the DR for the first access. */
1829
1830 static vect_memory_access_type
1831 get_negative_load_store_type (vec_info *vinfo,
1832 stmt_vec_info stmt_info, tree vectype,
1833 vec_load_store_type vls_type,
1834 unsigned int ncopies, poly_int64 *poffset)
1835 {
1836 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1837 dr_alignment_support alignment_support_scheme;
1838
1839 if (ncopies > 1)
1840 {
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1843 "multiple types with negative step.\n");
1844 return VMAT_ELEMENTWISE;
1845 }
1846
1847 /* For backward running DRs the first access in vectype actually is
1848 N-1 elements before the address of the DR. */
1849 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1850 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1851
1852 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1853 alignment_support_scheme
1854 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1855 if (alignment_support_scheme != dr_aligned
1856 && alignment_support_scheme != dr_unaligned_supported)
1857 {
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860 "negative step but alignment required.\n");
1861 *poffset = 0;
1862 return VMAT_ELEMENTWISE;
1863 }
1864
1865 if (vls_type == VLS_STORE_INVARIANT)
1866 {
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_NOTE, vect_location,
1869 "negative step with invariant source;"
1870 " no permute needed.\n");
1871 return VMAT_CONTIGUOUS_DOWN;
1872 }
1873
1874 if (!perm_mask_for_reverse (vectype))
1875 {
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878 "negative step and reversing not supported.\n");
1879 *poffset = 0;
1880 return VMAT_ELEMENTWISE;
1881 }
1882
1883 return VMAT_CONTIGUOUS_REVERSE;
1884 }
1885
1886 /* STMT_INFO is either a masked or unconditional store. Return the value
1887 being stored. */
1888
1889 tree
1890 vect_get_store_rhs (stmt_vec_info stmt_info)
1891 {
1892 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1893 {
1894 gcc_assert (gimple_assign_single_p (assign));
1895 return gimple_assign_rhs1 (assign);
1896 }
1897 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1898 {
1899 internal_fn ifn = gimple_call_internal_fn (call);
1900 int index = internal_fn_stored_value_index (ifn);
1901 gcc_assert (index >= 0);
1902 return gimple_call_arg (call, index);
1903 }
1904 gcc_unreachable ();
1905 }
1906
1907 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1908
1909 This function returns a vector type which can be composed with NETLS pieces,
1910 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1911 same vector size as the return vector. It checks target whether supports
1912 pieces-size vector mode for construction firstly, if target fails to, check
1913 pieces-size scalar mode for construction further. It returns NULL_TREE if
1914 fails to find the available composition.
1915
1916 For example, for (vtype=V16QI, nelts=4), we can probably get:
1917 - V16QI with PTYPE V4QI.
1918 - V4SI with PTYPE SI.
1919 - NULL_TREE. */
1920
1921 static tree
1922 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1923 {
1924 gcc_assert (VECTOR_TYPE_P (vtype));
1925 gcc_assert (known_gt (nelts, 0U));
1926
1927 machine_mode vmode = TYPE_MODE (vtype);
1928 if (!VECTOR_MODE_P (vmode))
1929 return NULL_TREE;
1930
1931 /* When we are asked to compose the vector from its components let
1932 that happen directly. */
1933 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1934 {
1935 *ptype = TREE_TYPE (vtype);
1936 return vtype;
1937 }
1938
1939 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1940 unsigned int pbsize;
1941 if (constant_multiple_p (vbsize, nelts, &pbsize))
1942 {
1943 /* First check if vec_init optab supports construction from
1944 vector pieces directly. */
1945 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1946 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1947 machine_mode rmode;
1948 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1949 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1950 != CODE_FOR_nothing))
1951 {
1952 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1953 return vtype;
1954 }
1955
1956 /* Otherwise check if exists an integer type of the same piece size and
1957 if vec_init optab supports construction from it directly. */
1958 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1959 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1960 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1961 != CODE_FOR_nothing))
1962 {
1963 *ptype = build_nonstandard_integer_type (pbsize, 1);
1964 return build_vector_type (*ptype, nelts);
1965 }
1966 }
1967
1968 return NULL_TREE;
1969 }
1970
1971 /* A subroutine of get_load_store_type, with a subset of the same
1972 arguments. Handle the case where STMT_INFO is part of a grouped load
1973 or store.
1974
1975 For stores, the statements in the group are all consecutive
1976 and there is no gap at the end. For loads, the statements in the
1977 group might not be consecutive; there can be gaps between statements
1978 as well as at the end. */
1979
1980 static bool
1981 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1982 tree vectype, slp_tree slp_node,
1983 bool masked_p, vec_load_store_type vls_type,
1984 vect_memory_access_type *memory_access_type,
1985 poly_int64 *poffset,
1986 dr_alignment_support *alignment_support_scheme,
1987 int *misalignment,
1988 gather_scatter_info *gs_info,
1989 internal_fn *lanes_ifn)
1990 {
1991 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1992 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1993 stmt_vec_info first_stmt_info;
1994 unsigned int group_size;
1995 unsigned HOST_WIDE_INT gap;
1996 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1997 {
1998 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1999 group_size = DR_GROUP_SIZE (first_stmt_info);
2000 gap = DR_GROUP_GAP (first_stmt_info);
2001 }
2002 else
2003 {
2004 first_stmt_info = stmt_info;
2005 group_size = 1;
2006 gap = 0;
2007 }
2008 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2009 bool single_element_p = (stmt_info == first_stmt_info
2010 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2011 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2012
2013 /* True if the vectorized statements would access beyond the last
2014 statement in the group. */
2015 bool overrun_p = false;
2016
2017 /* True if we can cope with such overrun by peeling for gaps, so that
2018 there is at least one final scalar iteration after the vector loop. */
2019 bool can_overrun_p = (!masked_p
2020 && vls_type == VLS_LOAD
2021 && loop_vinfo
2022 && !loop->inner);
2023
2024 /* There can only be a gap at the end of the group if the stride is
2025 known at compile time. */
2026 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2027
2028 /* Stores can't yet have gaps. */
2029 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2030
2031 if (slp_node)
2032 {
2033 /* For SLP vectorization we directly vectorize a subchain
2034 without permutation. */
2035 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2036 first_dr_info
2037 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2038 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2039 {
2040 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2041 separated by the stride, until we have a complete vector.
2042 Fall back to scalar accesses if that isn't possible. */
2043 if (multiple_p (nunits, group_size))
2044 *memory_access_type = VMAT_STRIDED_SLP;
2045 else
2046 *memory_access_type = VMAT_ELEMENTWISE;
2047 }
2048 else
2049 {
2050 overrun_p = loop_vinfo && gap != 0;
2051 if (overrun_p && vls_type != VLS_LOAD)
2052 {
2053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2054 "Grouped store with gaps requires"
2055 " non-consecutive accesses\n");
2056 return false;
2057 }
2058 /* An overrun is fine if the trailing elements are smaller
2059 than the alignment boundary B. Every vector access will
2060 be a multiple of B and so we are guaranteed to access a
2061 non-gap element in the same B-sized block. */
2062 if (overrun_p
2063 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2064 vectype)
2065 / vect_get_scalar_dr_size (first_dr_info)))
2066 overrun_p = false;
2067
2068 /* If the gap splits the vector in half and the target
2069 can do half-vector operations avoid the epilogue peeling
2070 by simply loading half of the vector only. Usually
2071 the construction with an upper zero half will be elided. */
2072 dr_alignment_support alss;
2073 int misalign = dr_misalignment (first_dr_info, vectype);
2074 tree half_vtype;
2075 if (overrun_p
2076 && !masked_p
2077 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2078 vectype, misalign)))
2079 == dr_aligned
2080 || alss == dr_unaligned_supported)
2081 && known_eq (nunits, (group_size - gap) * 2)
2082 && known_eq (nunits, group_size)
2083 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2084 != NULL_TREE))
2085 overrun_p = false;
2086
2087 if (overrun_p && !can_overrun_p)
2088 {
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2091 "Peeling for outer loop is not supported\n");
2092 return false;
2093 }
2094 int cmp = compare_step_with_zero (vinfo, stmt_info);
2095 if (cmp < 0)
2096 {
2097 if (single_element_p)
2098 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2099 only correct for single element "interleaving" SLP. */
2100 *memory_access_type = get_negative_load_store_type
2101 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2102 else
2103 {
2104 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2105 separated by the stride, until we have a complete vector.
2106 Fall back to scalar accesses if that isn't possible. */
2107 if (multiple_p (nunits, group_size))
2108 *memory_access_type = VMAT_STRIDED_SLP;
2109 else
2110 *memory_access_type = VMAT_ELEMENTWISE;
2111 }
2112 }
2113 else if (cmp == 0 && loop_vinfo)
2114 {
2115 gcc_assert (vls_type == VLS_LOAD);
2116 *memory_access_type = VMAT_INVARIANT;
2117 /* Invariant accesses perform only component accesses, alignment
2118 is irrelevant for them. */
2119 *alignment_support_scheme = dr_unaligned_supported;
2120 }
2121 else
2122 *memory_access_type = VMAT_CONTIGUOUS;
2123
2124 /* When we have a contiguous access across loop iterations
2125 but the access in the loop doesn't cover the full vector
2126 we can end up with no gap recorded but still excess
2127 elements accessed, see PR103116. Make sure we peel for
2128 gaps if necessary and sufficient and give up if not.
2129
2130 If there is a combination of the access not covering the full
2131 vector and a gap recorded then we may need to peel twice. */
2132 if (loop_vinfo
2133 && *memory_access_type == VMAT_CONTIGUOUS
2134 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2135 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2136 nunits))
2137 {
2138 unsigned HOST_WIDE_INT cnunits, cvf;
2139 if (!can_overrun_p
2140 || !nunits.is_constant (&cnunits)
2141 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2142 /* Peeling for gaps assumes that a single scalar iteration
2143 is enough to make sure the last vector iteration doesn't
2144 access excess elements.
2145 ??? Enhancements include peeling multiple iterations
2146 or using masked loads with a static mask. */
2147 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2148 {
2149 if (dump_enabled_p ())
2150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2151 "peeling for gaps insufficient for "
2152 "access\n");
2153 return false;
2154 }
2155 overrun_p = true;
2156 }
2157 }
2158 }
2159 else
2160 {
2161 /* We can always handle this case using elementwise accesses,
2162 but see if something more efficient is available. */
2163 *memory_access_type = VMAT_ELEMENTWISE;
2164
2165 /* If there is a gap at the end of the group then these optimizations
2166 would access excess elements in the last iteration. */
2167 bool would_overrun_p = (gap != 0);
2168 /* An overrun is fine if the trailing elements are smaller than the
2169 alignment boundary B. Every vector access will be a multiple of B
2170 and so we are guaranteed to access a non-gap element in the
2171 same B-sized block. */
2172 if (would_overrun_p
2173 && !masked_p
2174 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2175 / vect_get_scalar_dr_size (first_dr_info)))
2176 would_overrun_p = false;
2177
2178 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2179 && (can_overrun_p || !would_overrun_p)
2180 && compare_step_with_zero (vinfo, stmt_info) > 0)
2181 {
2182 /* First cope with the degenerate case of a single-element
2183 vector. */
2184 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2185 ;
2186
2187 else
2188 {
2189 /* Otherwise try using LOAD/STORE_LANES. */
2190 *lanes_ifn
2191 = vls_type == VLS_LOAD
2192 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2193 : vect_store_lanes_supported (vectype, group_size,
2194 masked_p);
2195 if (*lanes_ifn != IFN_LAST)
2196 {
2197 *memory_access_type = VMAT_LOAD_STORE_LANES;
2198 overrun_p = would_overrun_p;
2199 }
2200
2201 /* If that fails, try using permuting loads. */
2202 else if (vls_type == VLS_LOAD
2203 ? vect_grouped_load_supported (vectype,
2204 single_element_p,
2205 group_size)
2206 : vect_grouped_store_supported (vectype, group_size))
2207 {
2208 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2209 overrun_p = would_overrun_p;
2210 }
2211 }
2212 }
2213
2214 /* As a last resort, trying using a gather load or scatter store.
2215
2216 ??? Although the code can handle all group sizes correctly,
2217 it probably isn't a win to use separate strided accesses based
2218 on nearby locations. Or, even if it's a win over scalar code,
2219 it might not be a win over vectorizing at a lower VF, if that
2220 allows us to use contiguous accesses. */
2221 if (*memory_access_type == VMAT_ELEMENTWISE
2222 && single_element_p
2223 && loop_vinfo
2224 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2225 masked_p, gs_info))
2226 *memory_access_type = VMAT_GATHER_SCATTER;
2227 }
2228
2229 if (*memory_access_type == VMAT_GATHER_SCATTER
2230 || *memory_access_type == VMAT_ELEMENTWISE)
2231 {
2232 *alignment_support_scheme = dr_unaligned_supported;
2233 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2234 }
2235 else
2236 {
2237 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2238 *alignment_support_scheme
2239 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2240 *misalignment);
2241 }
2242
2243 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2244 {
2245 /* STMT is the leader of the group. Check the operands of all the
2246 stmts of the group. */
2247 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2248 while (next_stmt_info)
2249 {
2250 tree op = vect_get_store_rhs (next_stmt_info);
2251 enum vect_def_type dt;
2252 if (!vect_is_simple_use (op, vinfo, &dt))
2253 {
2254 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2256 "use not simple.\n");
2257 return false;
2258 }
2259 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2260 }
2261 }
2262
2263 if (overrun_p)
2264 {
2265 gcc_assert (can_overrun_p);
2266 if (dump_enabled_p ())
2267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2268 "Data access with gaps requires scalar "
2269 "epilogue loop\n");
2270 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2271 }
2272
2273 return true;
2274 }
2275
2276 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2277 if there is a memory access type that the vectorized form can use,
2278 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2279 or scatters, fill in GS_INFO accordingly. In addition
2280 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2281 the target does not support the alignment scheme. *MISALIGNMENT
2282 is set according to the alignment of the access (including
2283 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2284
2285 SLP says whether we're performing SLP rather than loop vectorization.
2286 MASKED_P is true if the statement is conditional on a vectorized mask.
2287 VECTYPE is the vector type that the vectorized statements will use.
2288 NCOPIES is the number of vector statements that will be needed. */
2289
2290 static bool
2291 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2292 tree vectype, slp_tree slp_node,
2293 bool masked_p, vec_load_store_type vls_type,
2294 unsigned int ncopies,
2295 vect_memory_access_type *memory_access_type,
2296 poly_int64 *poffset,
2297 dr_alignment_support *alignment_support_scheme,
2298 int *misalignment,
2299 gather_scatter_info *gs_info,
2300 internal_fn *lanes_ifn)
2301 {
2302 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2303 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2304 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2305 *poffset = 0;
2306 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2307 {
2308 *memory_access_type = VMAT_GATHER_SCATTER;
2309 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2310 gcc_unreachable ();
2311 /* When using internal functions, we rely on pattern recognition
2312 to convert the type of the offset to the type that the target
2313 requires, with the result being a call to an internal function.
2314 If that failed for some reason (e.g. because another pattern
2315 took priority), just handle cases in which the offset already
2316 has the right type. */
2317 else if (gs_info->ifn != IFN_LAST
2318 && !is_gimple_call (stmt_info->stmt)
2319 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2320 TREE_TYPE (gs_info->offset_vectype)))
2321 {
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "%s offset requires a conversion\n",
2325 vls_type == VLS_LOAD ? "gather" : "scatter");
2326 return false;
2327 }
2328 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2329 &gs_info->offset_dt,
2330 &gs_info->offset_vectype))
2331 {
2332 if (dump_enabled_p ())
2333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2334 "%s index use not simple.\n",
2335 vls_type == VLS_LOAD ? "gather" : "scatter");
2336 return false;
2337 }
2338 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2339 {
2340 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2341 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2342 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2343 (gs_info->offset_vectype),
2344 TYPE_VECTOR_SUBPARTS (vectype)))
2345 {
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2348 "unsupported vector types for emulated "
2349 "gather.\n");
2350 return false;
2351 }
2352 }
2353 /* Gather-scatter accesses perform only component accesses, alignment
2354 is irrelevant for them. */
2355 *alignment_support_scheme = dr_unaligned_supported;
2356 }
2357 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2358 {
2359 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2360 masked_p,
2361 vls_type, memory_access_type, poffset,
2362 alignment_support_scheme,
2363 misalignment, gs_info, lanes_ifn))
2364 return false;
2365 }
2366 else if (STMT_VINFO_STRIDED_P (stmt_info))
2367 {
2368 gcc_assert (!slp_node);
2369 if (loop_vinfo
2370 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2371 masked_p, gs_info))
2372 *memory_access_type = VMAT_GATHER_SCATTER;
2373 else
2374 *memory_access_type = VMAT_ELEMENTWISE;
2375 /* Alignment is irrelevant here. */
2376 *alignment_support_scheme = dr_unaligned_supported;
2377 }
2378 else
2379 {
2380 int cmp = compare_step_with_zero (vinfo, stmt_info);
2381 if (cmp == 0)
2382 {
2383 gcc_assert (vls_type == VLS_LOAD);
2384 *memory_access_type = VMAT_INVARIANT;
2385 /* Invariant accesses perform only component accesses, alignment
2386 is irrelevant for them. */
2387 *alignment_support_scheme = dr_unaligned_supported;
2388 }
2389 else
2390 {
2391 if (cmp < 0)
2392 *memory_access_type = get_negative_load_store_type
2393 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2394 else
2395 *memory_access_type = VMAT_CONTIGUOUS;
2396 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2397 vectype, *poffset);
2398 *alignment_support_scheme
2399 = vect_supportable_dr_alignment (vinfo,
2400 STMT_VINFO_DR_INFO (stmt_info),
2401 vectype, *misalignment);
2402 }
2403 }
2404
2405 if ((*memory_access_type == VMAT_ELEMENTWISE
2406 || *memory_access_type == VMAT_STRIDED_SLP)
2407 && !nunits.is_constant ())
2408 {
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "Not using elementwise accesses due to variable "
2412 "vectorization factor.\n");
2413 return false;
2414 }
2415
2416 if (*alignment_support_scheme == dr_unaligned_unsupported)
2417 {
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420 "unsupported unaligned access\n");
2421 return false;
2422 }
2423
2424 /* FIXME: At the moment the cost model seems to underestimate the
2425 cost of using elementwise accesses. This check preserves the
2426 traditional behavior until that can be fixed. */
2427 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2428 if (!first_stmt_info)
2429 first_stmt_info = stmt_info;
2430 if (*memory_access_type == VMAT_ELEMENTWISE
2431 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2432 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2433 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2434 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2435 {
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2438 "not falling back to elementwise accesses\n");
2439 return false;
2440 }
2441 return true;
2442 }
2443
2444 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2445 conditional operation STMT_INFO. When returning true, store the mask
2446 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2447 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2448 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2449
2450 static bool
2451 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2452 slp_tree slp_node, unsigned mask_index,
2453 tree *mask, slp_tree *mask_node,
2454 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2455 {
2456 enum vect_def_type mask_dt;
2457 tree mask_vectype;
2458 slp_tree mask_node_1;
2459 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2460 mask, &mask_node_1, &mask_dt, &mask_vectype))
2461 {
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "mask use not simple.\n");
2465 return false;
2466 }
2467
2468 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2469 {
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2472 "mask argument is not a boolean.\n");
2473 return false;
2474 }
2475
2476 /* If the caller is not prepared for adjusting an external/constant
2477 SLP mask vector type fail. */
2478 if (slp_node
2479 && !mask_node
2480 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2481 {
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2484 "SLP mask argument is not vectorized.\n");
2485 return false;
2486 }
2487
2488 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2489 if (!mask_vectype)
2490 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2491 mask_node_1);
2492
2493 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2494 {
2495 if (dump_enabled_p ())
2496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2497 "could not find an appropriate vector mask type.\n");
2498 return false;
2499 }
2500
2501 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2502 TYPE_VECTOR_SUBPARTS (vectype)))
2503 {
2504 if (dump_enabled_p ())
2505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2506 "vector mask type %T"
2507 " does not match vector data type %T.\n",
2508 mask_vectype, vectype);
2509
2510 return false;
2511 }
2512
2513 *mask_dt_out = mask_dt;
2514 *mask_vectype_out = mask_vectype;
2515 if (mask_node)
2516 *mask_node = mask_node_1;
2517 return true;
2518 }
2519
2520 /* Return true if stored value is suitable for vectorizing store
2521 statement STMT_INFO. When returning true, store the scalar stored
2522 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2523 the type of the vectorized store value in
2524 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2525
2526 static bool
2527 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2528 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2529 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2530 vec_load_store_type *vls_type_out)
2531 {
2532 int op_no = 0;
2533 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2534 {
2535 if (gimple_call_internal_p (call)
2536 && internal_store_fn_p (gimple_call_internal_fn (call)))
2537 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2538 }
2539 if (slp_node)
2540 op_no = vect_slp_child_index_for_operand
2541 (stmt_info->stmt, op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2542
2543 enum vect_def_type rhs_dt;
2544 tree rhs_vectype;
2545 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2546 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2547 {
2548 if (dump_enabled_p ())
2549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2550 "use not simple.\n");
2551 return false;
2552 }
2553
2554 /* In the case this is a store from a constant make sure
2555 native_encode_expr can handle it. */
2556 if (rhs_dt == vect_constant_def
2557 && CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2558 {
2559 if (dump_enabled_p ())
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2561 "cannot encode constant as a byte sequence.\n");
2562 return false;
2563 }
2564
2565 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2566 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2567 {
2568 if (dump_enabled_p ())
2569 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2570 "incompatible vector types.\n");
2571 return false;
2572 }
2573
2574 *rhs_dt_out = rhs_dt;
2575 *rhs_vectype_out = rhs_vectype;
2576 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2577 *vls_type_out = VLS_STORE_INVARIANT;
2578 else
2579 *vls_type_out = VLS_STORE;
2580 return true;
2581 }
2582
2583 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2584 Note that we support masks with floating-point type, in which case the
2585 floats are interpreted as a bitmask. */
2586
2587 static tree
2588 vect_build_all_ones_mask (vec_info *vinfo,
2589 stmt_vec_info stmt_info, tree masktype)
2590 {
2591 if (TREE_CODE (masktype) == INTEGER_TYPE)
2592 return build_int_cst (masktype, -1);
2593 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2594 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2595 {
2596 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2597 mask = build_vector_from_val (masktype, mask);
2598 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2599 }
2600 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2601 {
2602 REAL_VALUE_TYPE r;
2603 long tmp[6];
2604 for (int j = 0; j < 6; ++j)
2605 tmp[j] = -1;
2606 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2607 tree mask = build_real (TREE_TYPE (masktype), r);
2608 mask = build_vector_from_val (masktype, mask);
2609 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2610 }
2611 gcc_unreachable ();
2612 }
2613
2614 /* Build an all-zero merge value of type VECTYPE while vectorizing
2615 STMT_INFO as a gather load. */
2616
2617 static tree
2618 vect_build_zero_merge_argument (vec_info *vinfo,
2619 stmt_vec_info stmt_info, tree vectype)
2620 {
2621 tree merge;
2622 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2623 merge = build_int_cst (TREE_TYPE (vectype), 0);
2624 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2625 {
2626 REAL_VALUE_TYPE r;
2627 long tmp[6];
2628 for (int j = 0; j < 6; ++j)
2629 tmp[j] = 0;
2630 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2631 merge = build_real (TREE_TYPE (vectype), r);
2632 }
2633 else
2634 gcc_unreachable ();
2635 merge = build_vector_from_val (vectype, merge);
2636 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2637 }
2638
2639 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2640 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2641 the gather load operation. If the load is conditional, MASK is the
2642 vectorized condition, otherwise MASK is null. PTR is the base
2643 pointer and OFFSET is the vectorized offset. */
2644
2645 static gimple *
2646 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2647 gimple_stmt_iterator *gsi,
2648 gather_scatter_info *gs_info,
2649 tree ptr, tree offset, tree mask)
2650 {
2651 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2652 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2653 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2654 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2655 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2656 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2657 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2658 tree scaletype = TREE_VALUE (arglist);
2659 tree var;
2660 gcc_checking_assert (types_compatible_p (srctype, rettype)
2661 && (!mask
2662 || TREE_CODE (masktype) == INTEGER_TYPE
2663 || types_compatible_p (srctype, masktype)));
2664
2665 tree op = offset;
2666 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2667 {
2668 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2669 TYPE_VECTOR_SUBPARTS (idxtype)));
2670 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2671 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2672 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2673 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2674 op = var;
2675 }
2676
2677 tree src_op = NULL_TREE;
2678 tree mask_op = NULL_TREE;
2679 if (mask)
2680 {
2681 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2682 {
2683 tree utype, optype = TREE_TYPE (mask);
2684 if (VECTOR_TYPE_P (masktype)
2685 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2686 utype = masktype;
2687 else
2688 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2689 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2690 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2691 gassign *new_stmt
2692 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2693 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2694 mask_arg = var;
2695 if (!useless_type_conversion_p (masktype, utype))
2696 {
2697 gcc_assert (TYPE_PRECISION (utype)
2698 <= TYPE_PRECISION (masktype));
2699 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2700 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2701 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2702 mask_arg = var;
2703 }
2704 src_op = build_zero_cst (srctype);
2705 mask_op = mask_arg;
2706 }
2707 else
2708 {
2709 src_op = mask;
2710 mask_op = mask;
2711 }
2712 }
2713 else
2714 {
2715 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2716 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2717 }
2718
2719 tree scale = build_int_cst (scaletype, gs_info->scale);
2720 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2721 mask_op, scale);
2722
2723 if (!useless_type_conversion_p (vectype, rettype))
2724 {
2725 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2726 TYPE_VECTOR_SUBPARTS (rettype)));
2727 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2728 gimple_call_set_lhs (new_stmt, op);
2729 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2730 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2731 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2732 }
2733
2734 return new_stmt;
2735 }
2736
2737 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2738 instructions before GSI. GS_INFO describes the scatter store operation.
2739 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2740 vectorized data to store.
2741 If the store is conditional, MASK is the vectorized condition, otherwise
2742 MASK is null. */
2743
2744 static gimple *
2745 vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2746 gimple_stmt_iterator *gsi,
2747 gather_scatter_info *gs_info,
2748 tree ptr, tree offset, tree oprnd, tree mask)
2749 {
2750 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2751 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2752 /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
2753 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2754 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2755 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2756 tree scaletype = TREE_VALUE (arglist);
2757 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2758 && TREE_CODE (rettype) == VOID_TYPE);
2759
2760 tree mask_arg = NULL_TREE;
2761 if (mask)
2762 {
2763 mask_arg = mask;
2764 tree optype = TREE_TYPE (mask_arg);
2765 tree utype;
2766 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2767 utype = masktype;
2768 else
2769 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2770 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2771 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2772 gassign *new_stmt
2773 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2774 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2775 mask_arg = var;
2776 if (!useless_type_conversion_p (masktype, utype))
2777 {
2778 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2779 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2780 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2781 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2782 mask_arg = var;
2783 }
2784 }
2785 else
2786 {
2787 mask_arg = build_int_cst (masktype, -1);
2788 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2789 }
2790
2791 tree src = oprnd;
2792 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2793 {
2794 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2795 TYPE_VECTOR_SUBPARTS (srctype)));
2796 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2797 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2798 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2799 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2800 src = var;
2801 }
2802
2803 tree op = offset;
2804 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2805 {
2806 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2807 TYPE_VECTOR_SUBPARTS (idxtype)));
2808 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2809 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2810 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2811 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2812 op = var;
2813 }
2814
2815 tree scale = build_int_cst (scaletype, gs_info->scale);
2816 gcall *new_stmt
2817 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2818 return new_stmt;
2819 }
2820
2821 /* Prepare the base and offset in GS_INFO for vectorization.
2822 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2823 to the vectorized offset argument for the first copy of STMT_INFO.
2824 STMT_INFO is the statement described by GS_INFO and LOOP is the
2825 containing loop. */
2826
2827 static void
2828 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2829 class loop *loop, stmt_vec_info stmt_info,
2830 slp_tree slp_node, gather_scatter_info *gs_info,
2831 tree *dataref_ptr, vec<tree> *vec_offset)
2832 {
2833 gimple_seq stmts = NULL;
2834 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2835 if (stmts != NULL)
2836 {
2837 basic_block new_bb;
2838 edge pe = loop_preheader_edge (loop);
2839 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2840 gcc_assert (!new_bb);
2841 }
2842 if (slp_node)
2843 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2844 else
2845 {
2846 unsigned ncopies
2847 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2848 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2849 gs_info->offset, vec_offset,
2850 gs_info->offset_vectype);
2851 }
2852 }
2853
2854 /* Prepare to implement a grouped or strided load or store using
2855 the gather load or scatter store operation described by GS_INFO.
2856 STMT_INFO is the load or store statement.
2857
2858 Set *DATAREF_BUMP to the amount that should be added to the base
2859 address after each copy of the vectorized statement. Set *VEC_OFFSET
2860 to an invariant offset vector in which element I has the value
2861 I * DR_STEP / SCALE. */
2862
2863 static void
2864 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2865 loop_vec_info loop_vinfo,
2866 gimple_stmt_iterator *gsi,
2867 gather_scatter_info *gs_info,
2868 tree *dataref_bump, tree *vec_offset,
2869 vec_loop_lens *loop_lens)
2870 {
2871 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2872 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2873
2874 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2875 {
2876 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2877 ivtmp_8 = _31 * 16 (step in bytes);
2878 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2879 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2880 tree loop_len
2881 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2882 tree tmp
2883 = fold_build2 (MULT_EXPR, sizetype,
2884 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2885 loop_len);
2886 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2887 GSI_SAME_STMT);
2888 }
2889 else
2890 {
2891 tree bump
2892 = size_binop (MULT_EXPR,
2893 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2894 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2895 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2896 }
2897
2898 /* The offset given in GS_INFO can have pointer type, so use the element
2899 type of the vector instead. */
2900 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2901
2902 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2903 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2904 ssize_int (gs_info->scale));
2905 step = fold_convert (offset_type, step);
2906
2907 /* Create {0, X, X*2, X*3, ...}. */
2908 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2909 build_zero_cst (offset_type), step);
2910 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2911 }
2912
2913 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2914 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2915 allow each iteration process the flexible number of elements as long as
2916 the number <= vf elments.
2917
2918 Return data reference according to SELECT_VL.
2919 If new statements are needed, insert them before GSI. */
2920
2921 static tree
2922 vect_get_loop_variant_data_ptr_increment (
2923 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
2924 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
2925 vect_memory_access_type memory_access_type)
2926 {
2927 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2928 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2929
2930 /* gather/scatter never reach here. */
2931 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2932
2933 /* When we support SELECT_VL pattern, we dynamic adjust
2934 the memory address by .SELECT_VL result.
2935
2936 The result of .SELECT_VL is the number of elements to
2937 be processed of each iteration. So the memory address
2938 adjustment operation should be:
2939
2940 addr = addr + .SELECT_VL (ARG..) * step;
2941 */
2942 tree loop_len
2943 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
2944 tree len_type = TREE_TYPE (loop_len);
2945 /* Since the outcome of .SELECT_VL is element size, we should adjust
2946 it into bytesize so that it can be used in address pointer variable
2947 amount IVs adjustment. */
2948 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2949 wide_int_to_tree (len_type, wi::to_widest (step)));
2950 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
2951 gassign *assign = gimple_build_assign (bump, tmp);
2952 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
2953 return bump;
2954 }
2955
2956 /* Return the amount that should be added to a vector pointer to move
2957 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2958 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2959 vectorization. */
2960
2961 static tree
2962 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
2963 dr_vec_info *dr_info, tree aggr_type,
2964 vect_memory_access_type memory_access_type,
2965 vec_loop_lens *loop_lens = nullptr)
2966 {
2967 if (memory_access_type == VMAT_INVARIANT)
2968 return size_zero_node;
2969
2970 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2971 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2972 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
2973 loop_lens, dr_info,
2974 memory_access_type);
2975
2976 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2977 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2978 if (tree_int_cst_sgn (step) == -1)
2979 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2980 return iv_step;
2981 }
2982
2983 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2984
2985 static bool
2986 vectorizable_bswap (vec_info *vinfo,
2987 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2988 gimple **vec_stmt, slp_tree slp_node,
2989 slp_tree *slp_op,
2990 tree vectype_in, stmt_vector_for_cost *cost_vec)
2991 {
2992 tree op, vectype;
2993 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2994 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2995 unsigned ncopies;
2996
2997 op = gimple_call_arg (stmt, 0);
2998 vectype = STMT_VINFO_VECTYPE (stmt_info);
2999 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3000
3001 /* Multiple types in SLP are handled by creating the appropriate number of
3002 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3003 case of SLP. */
3004 if (slp_node)
3005 ncopies = 1;
3006 else
3007 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3008
3009 gcc_assert (ncopies >= 1);
3010
3011 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
3012 {
3013 if (dump_enabled_p ())
3014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3015 "mismatched vector sizes %T and %T\n",
3016 vectype_in, vectype);
3017 return false;
3018 }
3019
3020 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3021 if (! char_vectype)
3022 return false;
3023
3024 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3025 unsigned word_bytes;
3026 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3027 return false;
3028
3029 /* The encoding uses one stepped pattern for each byte in the word. */
3030 vec_perm_builder elts (num_bytes, word_bytes, 3);
3031 for (unsigned i = 0; i < 3; ++i)
3032 for (unsigned j = 0; j < word_bytes; ++j)
3033 elts.quick_push ((i + 1) * word_bytes - j - 1);
3034
3035 vec_perm_indices indices (elts, 1, num_bytes);
3036 machine_mode vmode = TYPE_MODE (char_vectype);
3037 if (!can_vec_perm_const_p (vmode, vmode, indices))
3038 return false;
3039
3040 if (! vec_stmt)
3041 {
3042 if (slp_node
3043 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3044 {
3045 if (dump_enabled_p ())
3046 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3047 "incompatible vector types for invariants\n");
3048 return false;
3049 }
3050
3051 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3052 DUMP_VECT_SCOPE ("vectorizable_bswap");
3053 record_stmt_cost (cost_vec,
3054 1, vector_stmt, stmt_info, 0, vect_prologue);
3055 record_stmt_cost (cost_vec,
3056 slp_node
3057 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3058 vec_perm, stmt_info, 0, vect_body);
3059 return true;
3060 }
3061
3062 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3063
3064 /* Transform. */
3065 vec<tree> vec_oprnds = vNULL;
3066 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3067 op, &vec_oprnds);
3068 /* Arguments are ready. create the new vector stmt. */
3069 unsigned i;
3070 tree vop;
3071 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3072 {
3073 gimple *new_stmt;
3074 tree tem = make_ssa_name (char_vectype);
3075 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3076 char_vectype, vop));
3077 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3078 tree tem2 = make_ssa_name (char_vectype);
3079 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3080 tem, tem, bswap_vconst);
3081 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3082 tem = make_ssa_name (vectype);
3083 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3084 vectype, tem2));
3085 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3086 if (slp_node)
3087 slp_node->push_vec_def (new_stmt);
3088 else
3089 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3090 }
3091
3092 if (!slp_node)
3093 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3094
3095 vec_oprnds.release ();
3096 return true;
3097 }
3098
3099 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3100 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3101 in a single step. On success, store the binary pack code in
3102 *CONVERT_CODE. */
3103
3104 static bool
3105 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3106 code_helper *convert_code)
3107 {
3108 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3109 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3110 return false;
3111
3112 code_helper code;
3113 int multi_step_cvt = 0;
3114 auto_vec <tree, 8> interm_types;
3115 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3116 &code, &multi_step_cvt, &interm_types)
3117 || multi_step_cvt)
3118 return false;
3119
3120 *convert_code = code;
3121 return true;
3122 }
3123
3124 /* Function vectorizable_call.
3125
3126 Check if STMT_INFO performs a function call that can be vectorized.
3127 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3128 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3129 Return true if STMT_INFO is vectorizable in this way. */
3130
3131 static bool
3132 vectorizable_call (vec_info *vinfo,
3133 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3134 gimple **vec_stmt, slp_tree slp_node,
3135 stmt_vector_for_cost *cost_vec)
3136 {
3137 gcall *stmt;
3138 tree vec_dest;
3139 tree scalar_dest;
3140 tree op;
3141 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3142 tree vectype_out, vectype_in;
3143 poly_uint64 nunits_in;
3144 poly_uint64 nunits_out;
3145 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3146 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3147 tree fndecl, new_temp, rhs_type;
3148 enum vect_def_type dt[4]
3149 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3150 vect_unknown_def_type };
3151 tree vectypes[ARRAY_SIZE (dt)] = {};
3152 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3153 int ndts = ARRAY_SIZE (dt);
3154 int ncopies, j;
3155 auto_vec<tree, 8> vargs;
3156 enum { NARROW, NONE, WIDEN } modifier;
3157 size_t i, nargs;
3158 tree lhs;
3159 tree clz_ctz_arg1 = NULL_TREE;
3160
3161 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3162 return false;
3163
3164 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3165 && ! vec_stmt)
3166 return false;
3167
3168 /* Is STMT_INFO a vectorizable call? */
3169 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3170 if (!stmt)
3171 return false;
3172
3173 if (gimple_call_internal_p (stmt)
3174 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3175 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3176 /* Handled by vectorizable_load and vectorizable_store. */
3177 return false;
3178
3179 if (gimple_call_lhs (stmt) == NULL_TREE
3180 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3181 return false;
3182
3183 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3184
3185 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3186
3187 /* Process function arguments. */
3188 rhs_type = NULL_TREE;
3189 vectype_in = NULL_TREE;
3190 nargs = gimple_call_num_args (stmt);
3191
3192 /* Bail out if the function has more than four arguments, we do not have
3193 interesting builtin functions to vectorize with more than two arguments
3194 except for fma. No arguments is also not good. */
3195 if (nargs == 0 || nargs > 4)
3196 return false;
3197
3198 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3199 combined_fn cfn = gimple_call_combined_fn (stmt);
3200 if (cfn == CFN_GOMP_SIMD_LANE)
3201 {
3202 nargs = 0;
3203 rhs_type = unsigned_type_node;
3204 }
3205 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3206 argument just says whether it is well-defined at zero or not and what
3207 value should be returned for it. */
3208 if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3209 {
3210 nargs = 1;
3211 clz_ctz_arg1 = gimple_call_arg (stmt, 1);
3212 }
3213
3214 int mask_opno = -1;
3215 if (internal_fn_p (cfn))
3216 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3217
3218 for (i = 0; i < nargs; i++)
3219 {
3220 if ((int) i == mask_opno)
3221 {
3222 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3223 &op, &slp_op[i], &dt[i], &vectypes[i]))
3224 return false;
3225 continue;
3226 }
3227
3228 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3229 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3230 {
3231 if (dump_enabled_p ())
3232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3233 "use not simple.\n");
3234 return false;
3235 }
3236
3237 /* We can only handle calls with arguments of the same type. */
3238 if (rhs_type
3239 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3240 {
3241 if (dump_enabled_p ())
3242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3243 "argument types differ.\n");
3244 return false;
3245 }
3246 if (!rhs_type)
3247 rhs_type = TREE_TYPE (op);
3248
3249 if (!vectype_in)
3250 vectype_in = vectypes[i];
3251 else if (vectypes[i]
3252 && !types_compatible_p (vectypes[i], vectype_in))
3253 {
3254 if (dump_enabled_p ())
3255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3256 "argument vector types differ.\n");
3257 return false;
3258 }
3259 }
3260 /* If all arguments are external or constant defs, infer the vector type
3261 from the scalar type. */
3262 if (!vectype_in)
3263 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3264 if (vec_stmt)
3265 gcc_assert (vectype_in);
3266 if (!vectype_in)
3267 {
3268 if (dump_enabled_p ())
3269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3270 "no vectype for scalar type %T\n", rhs_type);
3271
3272 return false;
3273 }
3274
3275 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3276 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3277 {
3278 if (dump_enabled_p ())
3279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3280 "mixed mask and nonmask vector types\n");
3281 return false;
3282 }
3283
3284 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3285 {
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3288 "use emulated vector type for call\n");
3289 return false;
3290 }
3291
3292 /* FORNOW */
3293 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3294 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3295 if (known_eq (nunits_in * 2, nunits_out))
3296 modifier = NARROW;
3297 else if (known_eq (nunits_out, nunits_in))
3298 modifier = NONE;
3299 else if (known_eq (nunits_out * 2, nunits_in))
3300 modifier = WIDEN;
3301 else
3302 return false;
3303
3304 /* We only handle functions that do not read or clobber memory. */
3305 if (gimple_vuse (stmt))
3306 {
3307 if (dump_enabled_p ())
3308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3309 "function reads from or writes to memory.\n");
3310 return false;
3311 }
3312
3313 /* For now, we only vectorize functions if a target specific builtin
3314 is available. TODO -- in some cases, it might be profitable to
3315 insert the calls for pieces of the vector, in order to be able
3316 to vectorize other operations in the loop. */
3317 fndecl = NULL_TREE;
3318 internal_fn ifn = IFN_LAST;
3319 tree callee = gimple_call_fndecl (stmt);
3320
3321 /* First try using an internal function. */
3322 code_helper convert_code = MAX_TREE_CODES;
3323 if (cfn != CFN_LAST
3324 && (modifier == NONE
3325 || (modifier == NARROW
3326 && simple_integer_narrowing (vectype_out, vectype_in,
3327 &convert_code))))
3328 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3329 vectype_in);
3330
3331 /* If that fails, try asking for a target-specific built-in function. */
3332 if (ifn == IFN_LAST)
3333 {
3334 if (cfn != CFN_LAST)
3335 fndecl = targetm.vectorize.builtin_vectorized_function
3336 (cfn, vectype_out, vectype_in);
3337 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3338 fndecl = targetm.vectorize.builtin_md_vectorized_function
3339 (callee, vectype_out, vectype_in);
3340 }
3341
3342 if (ifn == IFN_LAST && !fndecl)
3343 {
3344 if (cfn == CFN_GOMP_SIMD_LANE
3345 && !slp_node
3346 && loop_vinfo
3347 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3348 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3349 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3350 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3351 {
3352 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3353 { 0, 1, 2, ... vf - 1 } vector. */
3354 gcc_assert (nargs == 0);
3355 }
3356 else if (modifier == NONE
3357 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3358 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3359 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3360 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3361 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3362 slp_op, vectype_in, cost_vec);
3363 else
3364 {
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3367 "function is not vectorizable.\n");
3368 return false;
3369 }
3370 }
3371
3372 if (slp_node)
3373 ncopies = 1;
3374 else if (modifier == NARROW && ifn == IFN_LAST)
3375 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3376 else
3377 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3378
3379 /* Sanity check: make sure that at least one copy of the vectorized stmt
3380 needs to be generated. */
3381 gcc_assert (ncopies >= 1);
3382
3383 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3384 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3385 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3386 int len_opno = internal_fn_len_index (cond_len_fn);
3387 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3388 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3389 if (!vec_stmt) /* transformation not required. */
3390 {
3391 if (slp_node)
3392 for (i = 0; i < nargs; ++i)
3393 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3394 vectypes[i]
3395 ? vectypes[i] : vectype_in))
3396 {
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3399 "incompatible vector types for invariants\n");
3400 return false;
3401 }
3402 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3403 DUMP_VECT_SCOPE ("vectorizable_call");
3404 vect_model_simple_cost (vinfo, stmt_info,
3405 ncopies, dt, ndts, slp_node, cost_vec);
3406 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3407 record_stmt_cost (cost_vec, ncopies / 2,
3408 vec_promote_demote, stmt_info, 0, vect_body);
3409
3410 if (loop_vinfo
3411 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3412 && (reduc_idx >= 0 || mask_opno >= 0))
3413 {
3414 if (reduc_idx >= 0
3415 && (cond_fn == IFN_LAST
3416 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3417 OPTIMIZE_FOR_SPEED))
3418 && (cond_len_fn == IFN_LAST
3419 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3420 OPTIMIZE_FOR_SPEED)))
3421 {
3422 if (dump_enabled_p ())
3423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3424 "can't use a fully-masked loop because no"
3425 " conditional operation is available.\n");
3426 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3427 }
3428 else
3429 {
3430 unsigned int nvectors
3431 = (slp_node
3432 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3433 : ncopies);
3434 tree scalar_mask = NULL_TREE;
3435 if (mask_opno >= 0)
3436 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3437 if (cond_len_fn != IFN_LAST
3438 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3439 OPTIMIZE_FOR_SPEED))
3440 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3441 1);
3442 else
3443 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3444 scalar_mask);
3445 }
3446 }
3447 return true;
3448 }
3449
3450 /* Transform. */
3451
3452 if (dump_enabled_p ())
3453 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3454
3455 /* Handle def. */
3456 scalar_dest = gimple_call_lhs (stmt);
3457 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3458
3459 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3460 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3461 unsigned int vect_nargs = nargs;
3462 if (len_loop_p)
3463 {
3464 if (len_opno >= 0)
3465 {
3466 ifn = cond_len_fn;
3467 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3468 vect_nargs += 2;
3469 }
3470 else if (reduc_idx >= 0)
3471 gcc_unreachable ();
3472 }
3473 else if (masked_loop_p && reduc_idx >= 0)
3474 {
3475 ifn = cond_fn;
3476 vect_nargs += 2;
3477 }
3478 if (clz_ctz_arg1)
3479 ++vect_nargs;
3480
3481 if (modifier == NONE || ifn != IFN_LAST)
3482 {
3483 tree prev_res = NULL_TREE;
3484 vargs.safe_grow (vect_nargs, true);
3485 auto_vec<vec<tree> > vec_defs (nargs);
3486 for (j = 0; j < ncopies; ++j)
3487 {
3488 /* Build argument list for the vectorized call. */
3489 if (slp_node)
3490 {
3491 vec<tree> vec_oprnds0;
3492
3493 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3494 vec_oprnds0 = vec_defs[0];
3495
3496 /* Arguments are ready. Create the new vector stmt. */
3497 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3498 {
3499 int varg = 0;
3500 if (masked_loop_p && reduc_idx >= 0)
3501 {
3502 unsigned int vec_num = vec_oprnds0.length ();
3503 /* Always true for SLP. */
3504 gcc_assert (ncopies == 1);
3505 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3506 gsi, masks, vec_num,
3507 vectype_out, i);
3508 }
3509 size_t k;
3510 for (k = 0; k < nargs; k++)
3511 {
3512 vec<tree> vec_oprndsk = vec_defs[k];
3513 vargs[varg++] = vec_oprndsk[i];
3514 }
3515 if (masked_loop_p && reduc_idx >= 0)
3516 vargs[varg++] = vargs[reduc_idx + 1];
3517 if (clz_ctz_arg1)
3518 vargs[varg++] = clz_ctz_arg1;
3519
3520 gimple *new_stmt;
3521 if (modifier == NARROW)
3522 {
3523 /* We don't define any narrowing conditional functions
3524 at present. */
3525 gcc_assert (mask_opno < 0);
3526 tree half_res = make_ssa_name (vectype_in);
3527 gcall *call
3528 = gimple_build_call_internal_vec (ifn, vargs);
3529 gimple_call_set_lhs (call, half_res);
3530 gimple_call_set_nothrow (call, true);
3531 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3532 if ((i & 1) == 0)
3533 {
3534 prev_res = half_res;
3535 continue;
3536 }
3537 new_temp = make_ssa_name (vec_dest);
3538 new_stmt = vect_gimple_build (new_temp, convert_code,
3539 prev_res, half_res);
3540 vect_finish_stmt_generation (vinfo, stmt_info,
3541 new_stmt, gsi);
3542 }
3543 else
3544 {
3545 if (len_opno >= 0 && len_loop_p)
3546 {
3547 unsigned int vec_num = vec_oprnds0.length ();
3548 /* Always true for SLP. */
3549 gcc_assert (ncopies == 1);
3550 tree len
3551 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3552 vectype_out, i, 1);
3553 signed char biasval
3554 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3555 tree bias = build_int_cst (intQI_type_node, biasval);
3556 vargs[len_opno] = len;
3557 vargs[len_opno + 1] = bias;
3558 }
3559 else if (mask_opno >= 0 && masked_loop_p)
3560 {
3561 unsigned int vec_num = vec_oprnds0.length ();
3562 /* Always true for SLP. */
3563 gcc_assert (ncopies == 1);
3564 tree mask = vect_get_loop_mask (loop_vinfo,
3565 gsi, masks, vec_num,
3566 vectype_out, i);
3567 vargs[mask_opno] = prepare_vec_mask
3568 (loop_vinfo, TREE_TYPE (mask), mask,
3569 vargs[mask_opno], gsi);
3570 }
3571
3572 gcall *call;
3573 if (ifn != IFN_LAST)
3574 call = gimple_build_call_internal_vec (ifn, vargs);
3575 else
3576 call = gimple_build_call_vec (fndecl, vargs);
3577 new_temp = make_ssa_name (vec_dest, call);
3578 gimple_call_set_lhs (call, new_temp);
3579 gimple_call_set_nothrow (call, true);
3580 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3581 new_stmt = call;
3582 }
3583 slp_node->push_vec_def (new_stmt);
3584 }
3585 continue;
3586 }
3587
3588 int varg = 0;
3589 if (masked_loop_p && reduc_idx >= 0)
3590 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3591 vectype_out, j);
3592 for (i = 0; i < nargs; i++)
3593 {
3594 op = gimple_call_arg (stmt, i);
3595 if (j == 0)
3596 {
3597 vec_defs.quick_push (vNULL);
3598 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3599 op, &vec_defs[i],
3600 vectypes[i]);
3601 }
3602 vargs[varg++] = vec_defs[i][j];
3603 }
3604 if (masked_loop_p && reduc_idx >= 0)
3605 vargs[varg++] = vargs[reduc_idx + 1];
3606 if (clz_ctz_arg1)
3607 vargs[varg++] = clz_ctz_arg1;
3608
3609 if (len_opno >= 0 && len_loop_p)
3610 {
3611 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3612 vectype_out, j, 1);
3613 signed char biasval
3614 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3615 tree bias = build_int_cst (intQI_type_node, biasval);
3616 vargs[len_opno] = len;
3617 vargs[len_opno + 1] = bias;
3618 }
3619 else if (mask_opno >= 0 && masked_loop_p)
3620 {
3621 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3622 vectype_out, j);
3623 vargs[mask_opno]
3624 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3625 vargs[mask_opno], gsi);
3626 }
3627
3628 gimple *new_stmt;
3629 if (cfn == CFN_GOMP_SIMD_LANE)
3630 {
3631 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3632 tree new_var
3633 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3634 gimple *init_stmt = gimple_build_assign (new_var, cst);
3635 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3636 new_temp = make_ssa_name (vec_dest);
3637 new_stmt = gimple_build_assign (new_temp, new_var);
3638 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3639 }
3640 else if (modifier == NARROW)
3641 {
3642 /* We don't define any narrowing conditional functions at
3643 present. */
3644 gcc_assert (mask_opno < 0);
3645 tree half_res = make_ssa_name (vectype_in);
3646 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3647 gimple_call_set_lhs (call, half_res);
3648 gimple_call_set_nothrow (call, true);
3649 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3650 if ((j & 1) == 0)
3651 {
3652 prev_res = half_res;
3653 continue;
3654 }
3655 new_temp = make_ssa_name (vec_dest);
3656 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3657 half_res);
3658 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3659 }
3660 else
3661 {
3662 gcall *call;
3663 if (ifn != IFN_LAST)
3664 call = gimple_build_call_internal_vec (ifn, vargs);
3665 else
3666 call = gimple_build_call_vec (fndecl, vargs);
3667 new_temp = make_ssa_name (vec_dest, call);
3668 gimple_call_set_lhs (call, new_temp);
3669 gimple_call_set_nothrow (call, true);
3670 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3671 new_stmt = call;
3672 }
3673
3674 if (j == (modifier == NARROW ? 1 : 0))
3675 *vec_stmt = new_stmt;
3676 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3677 }
3678 for (i = 0; i < nargs; i++)
3679 {
3680 vec<tree> vec_oprndsi = vec_defs[i];
3681 vec_oprndsi.release ();
3682 }
3683 }
3684 else if (modifier == NARROW)
3685 {
3686 auto_vec<vec<tree> > vec_defs (nargs);
3687 /* We don't define any narrowing conditional functions at present. */
3688 gcc_assert (mask_opno < 0);
3689 for (j = 0; j < ncopies; ++j)
3690 {
3691 /* Build argument list for the vectorized call. */
3692 if (j == 0)
3693 vargs.create (nargs * 2);
3694 else
3695 vargs.truncate (0);
3696
3697 if (slp_node)
3698 {
3699 vec<tree> vec_oprnds0;
3700
3701 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3702 vec_oprnds0 = vec_defs[0];
3703
3704 /* Arguments are ready. Create the new vector stmt. */
3705 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3706 {
3707 size_t k;
3708 vargs.truncate (0);
3709 for (k = 0; k < nargs; k++)
3710 {
3711 vec<tree> vec_oprndsk = vec_defs[k];
3712 vargs.quick_push (vec_oprndsk[i]);
3713 vargs.quick_push (vec_oprndsk[i + 1]);
3714 }
3715 gcall *call;
3716 if (ifn != IFN_LAST)
3717 call = gimple_build_call_internal_vec (ifn, vargs);
3718 else
3719 call = gimple_build_call_vec (fndecl, vargs);
3720 new_temp = make_ssa_name (vec_dest, call);
3721 gimple_call_set_lhs (call, new_temp);
3722 gimple_call_set_nothrow (call, true);
3723 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3724 slp_node->push_vec_def (call);
3725 }
3726 continue;
3727 }
3728
3729 for (i = 0; i < nargs; i++)
3730 {
3731 op = gimple_call_arg (stmt, i);
3732 if (j == 0)
3733 {
3734 vec_defs.quick_push (vNULL);
3735 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3736 op, &vec_defs[i], vectypes[i]);
3737 }
3738 vec_oprnd0 = vec_defs[i][2*j];
3739 vec_oprnd1 = vec_defs[i][2*j+1];
3740
3741 vargs.quick_push (vec_oprnd0);
3742 vargs.quick_push (vec_oprnd1);
3743 }
3744
3745 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3746 new_temp = make_ssa_name (vec_dest, new_stmt);
3747 gimple_call_set_lhs (new_stmt, new_temp);
3748 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3749
3750 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3751 }
3752
3753 if (!slp_node)
3754 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3755
3756 for (i = 0; i < nargs; i++)
3757 {
3758 vec<tree> vec_oprndsi = vec_defs[i];
3759 vec_oprndsi.release ();
3760 }
3761 }
3762 else
3763 /* No current target implements this case. */
3764 return false;
3765
3766 vargs.release ();
3767
3768 /* The call in STMT might prevent it from being removed in dce.
3769 We however cannot remove it here, due to the way the ssa name
3770 it defines is mapped to the new definition. So just replace
3771 rhs of the statement with something harmless. */
3772
3773 if (slp_node)
3774 return true;
3775
3776 stmt_info = vect_orig_stmt (stmt_info);
3777 lhs = gimple_get_lhs (stmt_info->stmt);
3778
3779 gassign *new_stmt
3780 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3781 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3782
3783 return true;
3784 }
3785
3786
3787 struct simd_call_arg_info
3788 {
3789 tree vectype;
3790 tree op;
3791 HOST_WIDE_INT linear_step;
3792 enum vect_def_type dt;
3793 unsigned int align;
3794 bool simd_lane_linear;
3795 };
3796
3797 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3798 is linear within simd lane (but not within whole loop), note it in
3799 *ARGINFO. */
3800
3801 static void
3802 vect_simd_lane_linear (tree op, class loop *loop,
3803 struct simd_call_arg_info *arginfo)
3804 {
3805 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3806
3807 if (!is_gimple_assign (def_stmt)
3808 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3809 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3810 return;
3811
3812 tree base = gimple_assign_rhs1 (def_stmt);
3813 HOST_WIDE_INT linear_step = 0;
3814 tree v = gimple_assign_rhs2 (def_stmt);
3815 while (TREE_CODE (v) == SSA_NAME)
3816 {
3817 tree t;
3818 def_stmt = SSA_NAME_DEF_STMT (v);
3819 if (is_gimple_assign (def_stmt))
3820 switch (gimple_assign_rhs_code (def_stmt))
3821 {
3822 case PLUS_EXPR:
3823 t = gimple_assign_rhs2 (def_stmt);
3824 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3825 return;
3826 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3827 v = gimple_assign_rhs1 (def_stmt);
3828 continue;
3829 case MULT_EXPR:
3830 t = gimple_assign_rhs2 (def_stmt);
3831 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3832 return;
3833 linear_step = tree_to_shwi (t);
3834 v = gimple_assign_rhs1 (def_stmt);
3835 continue;
3836 CASE_CONVERT:
3837 t = gimple_assign_rhs1 (def_stmt);
3838 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3839 || (TYPE_PRECISION (TREE_TYPE (v))
3840 < TYPE_PRECISION (TREE_TYPE (t))))
3841 return;
3842 if (!linear_step)
3843 linear_step = 1;
3844 v = t;
3845 continue;
3846 default:
3847 return;
3848 }
3849 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3850 && loop->simduid
3851 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3852 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3853 == loop->simduid))
3854 {
3855 if (!linear_step)
3856 linear_step = 1;
3857 arginfo->linear_step = linear_step;
3858 arginfo->op = base;
3859 arginfo->simd_lane_linear = true;
3860 return;
3861 }
3862 }
3863 }
3864
3865 /* Function vectorizable_simd_clone_call.
3866
3867 Check if STMT_INFO performs a function call that can be vectorized
3868 by calling a simd clone of the function.
3869 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3870 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3871 Return true if STMT_INFO is vectorizable in this way. */
3872
3873 static bool
3874 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3875 gimple_stmt_iterator *gsi,
3876 gimple **vec_stmt, slp_tree slp_node,
3877 stmt_vector_for_cost *)
3878 {
3879 tree vec_dest;
3880 tree scalar_dest;
3881 tree op, type;
3882 tree vec_oprnd0 = NULL_TREE;
3883 tree vectype;
3884 poly_uint64 nunits;
3885 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3886 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3887 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3888 tree fndecl, new_temp;
3889 int ncopies, j;
3890 auto_vec<simd_call_arg_info> arginfo;
3891 vec<tree> vargs = vNULL;
3892 size_t i, nargs;
3893 tree lhs, rtype, ratype;
3894 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3895 int masked_call_offset = 0;
3896
3897 /* Is STMT a vectorizable call? */
3898 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3899 if (!stmt)
3900 return false;
3901
3902 fndecl = gimple_call_fndecl (stmt);
3903 if (fndecl == NULL_TREE
3904 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
3905 {
3906 fndecl = gimple_call_arg (stmt, 0);
3907 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3908 fndecl = TREE_OPERAND (fndecl, 0);
3909 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3910 masked_call_offset = 1;
3911 }
3912 if (fndecl == NULL_TREE)
3913 return false;
3914
3915 struct cgraph_node *node = cgraph_node::get (fndecl);
3916 if (node == NULL || node->simd_clones == NULL)
3917 return false;
3918
3919 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3920 return false;
3921
3922 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3923 && ! vec_stmt)
3924 return false;
3925
3926 if (gimple_call_lhs (stmt)
3927 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3928 return false;
3929
3930 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3931
3932 vectype = STMT_VINFO_VECTYPE (stmt_info);
3933
3934 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3935 return false;
3936
3937 /* Process function arguments. */
3938 nargs = gimple_call_num_args (stmt) - masked_call_offset;
3939
3940 /* Bail out if the function has zero arguments. */
3941 if (nargs == 0)
3942 return false;
3943
3944 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
3945 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
3946 arginfo.reserve (nargs, true);
3947 auto_vec<slp_tree> slp_op;
3948 slp_op.safe_grow_cleared (nargs);
3949
3950 for (i = 0; i < nargs; i++)
3951 {
3952 simd_call_arg_info thisarginfo;
3953 affine_iv iv;
3954
3955 thisarginfo.linear_step = 0;
3956 thisarginfo.align = 0;
3957 thisarginfo.op = NULL_TREE;
3958 thisarginfo.simd_lane_linear = false;
3959
3960 int op_no = i + masked_call_offset;
3961 if (slp_node)
3962 op_no = vect_slp_child_index_for_operand (stmt, op_no, false);
3963 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3964 op_no, &op, &slp_op[i],
3965 &thisarginfo.dt, &thisarginfo.vectype)
3966 || thisarginfo.dt == vect_uninitialized_def)
3967 {
3968 if (dump_enabled_p ())
3969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3970 "use not simple.\n");
3971 return false;
3972 }
3973
3974 if (thisarginfo.dt == vect_constant_def
3975 || thisarginfo.dt == vect_external_def)
3976 {
3977 /* With SLP we determine the vector type of constants/externals
3978 at analysis time, handling conflicts via
3979 vect_maybe_update_slp_op_vectype. At transform time
3980 we have a vector type recorded for SLP. */
3981 gcc_assert (!vec_stmt
3982 || !slp_node
3983 || thisarginfo.vectype != NULL_TREE);
3984 if (!vec_stmt)
3985 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
3986 TREE_TYPE (op),
3987 slp_node);
3988 }
3989 else
3990 gcc_assert (thisarginfo.vectype != NULL_TREE);
3991
3992 /* For linear arguments, the analyze phase should have saved
3993 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3994 if (i * 3 + 4 <= simd_clone_info.length ()
3995 && simd_clone_info[i * 3 + 2])
3996 {
3997 gcc_assert (vec_stmt);
3998 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
3999 thisarginfo.op = simd_clone_info[i * 3 + 1];
4000 thisarginfo.simd_lane_linear
4001 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4002 /* If loop has been peeled for alignment, we need to adjust it. */
4003 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4004 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4005 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4006 {
4007 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4008 tree step = simd_clone_info[i * 3 + 2];
4009 tree opt = TREE_TYPE (thisarginfo.op);
4010 bias = fold_convert (TREE_TYPE (step), bias);
4011 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4012 thisarginfo.op
4013 = fold_build2 (POINTER_TYPE_P (opt)
4014 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4015 thisarginfo.op, bias);
4016 }
4017 }
4018 else if (!vec_stmt
4019 && thisarginfo.dt != vect_constant_def
4020 && thisarginfo.dt != vect_external_def
4021 && loop_vinfo
4022 && TREE_CODE (op) == SSA_NAME
4023 && simple_iv (loop, loop_containing_stmt (stmt), op,
4024 &iv, false)
4025 && tree_fits_shwi_p (iv.step))
4026 {
4027 thisarginfo.linear_step = tree_to_shwi (iv.step);
4028 thisarginfo.op = iv.base;
4029 }
4030 else if ((thisarginfo.dt == vect_constant_def
4031 || thisarginfo.dt == vect_external_def)
4032 && POINTER_TYPE_P (TREE_TYPE (op)))
4033 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4034 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4035 linear too. */
4036 if (POINTER_TYPE_P (TREE_TYPE (op))
4037 && !thisarginfo.linear_step
4038 && !vec_stmt
4039 && thisarginfo.dt != vect_constant_def
4040 && thisarginfo.dt != vect_external_def
4041 && loop_vinfo
4042 && TREE_CODE (op) == SSA_NAME)
4043 vect_simd_lane_linear (op, loop, &thisarginfo);
4044
4045 arginfo.quick_push (thisarginfo);
4046 }
4047
4048 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4049 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4050 unsigned int badness = 0;
4051 struct cgraph_node *bestn = NULL;
4052 if (simd_clone_info.exists ())
4053 bestn = cgraph_node::get (simd_clone_info[0]);
4054 else
4055 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4056 n = n->simdclone->next_clone)
4057 {
4058 unsigned int this_badness = 0;
4059 unsigned int num_calls;
4060 /* The number of arguments in the call and the number of parameters in
4061 the simdclone should match. However, when the simdclone is
4062 'inbranch', it could have one more paramater than nargs when using
4063 an inbranch simdclone to call a non-inbranch call, either in a
4064 non-masked loop using a all true constant mask, or inside a masked
4065 loop using it's mask. */
4066 size_t simd_nargs = n->simdclone->nargs;
4067 if (!masked_call_offset && n->simdclone->inbranch)
4068 simd_nargs--;
4069 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4070 &num_calls)
4071 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4072 || (nargs != simd_nargs))
4073 continue;
4074 if (num_calls != 1)
4075 this_badness += floor_log2 (num_calls) * 4096;
4076 if (n->simdclone->inbranch)
4077 this_badness += 8192;
4078 int target_badness = targetm.simd_clone.usable (n);
4079 if (target_badness < 0)
4080 continue;
4081 this_badness += target_badness * 512;
4082 for (i = 0; i < nargs; i++)
4083 {
4084 switch (n->simdclone->args[i].arg_type)
4085 {
4086 case SIMD_CLONE_ARG_TYPE_VECTOR:
4087 if (!useless_type_conversion_p
4088 (n->simdclone->args[i].orig_type,
4089 TREE_TYPE (gimple_call_arg (stmt,
4090 i + masked_call_offset))))
4091 i = -1;
4092 else if (arginfo[i].dt == vect_constant_def
4093 || arginfo[i].dt == vect_external_def
4094 || arginfo[i].linear_step)
4095 this_badness += 64;
4096 break;
4097 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4098 if (arginfo[i].dt != vect_constant_def
4099 && arginfo[i].dt != vect_external_def)
4100 i = -1;
4101 break;
4102 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4103 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4104 if (arginfo[i].dt == vect_constant_def
4105 || arginfo[i].dt == vect_external_def
4106 || (arginfo[i].linear_step
4107 != n->simdclone->args[i].linear_step))
4108 i = -1;
4109 break;
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4111 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4112 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4113 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4114 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4115 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4116 /* FORNOW */
4117 i = -1;
4118 break;
4119 case SIMD_CLONE_ARG_TYPE_MASK:
4120 /* While we can create a traditional data vector from
4121 an incoming integer mode mask we have no good way to
4122 force generate an integer mode mask from a traditional
4123 boolean vector input. */
4124 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4125 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4126 i = -1;
4127 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4128 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4129 this_badness += 2048;
4130 break;
4131 }
4132 if (i == (size_t) -1)
4133 break;
4134 if (n->simdclone->args[i].alignment > arginfo[i].align)
4135 {
4136 i = -1;
4137 break;
4138 }
4139 if (arginfo[i].align)
4140 this_badness += (exact_log2 (arginfo[i].align)
4141 - exact_log2 (n->simdclone->args[i].alignment));
4142 }
4143 if (i == (size_t) -1)
4144 continue;
4145 if (masked_call_offset == 0
4146 && n->simdclone->inbranch
4147 && n->simdclone->nargs > nargs)
4148 {
4149 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4150 SIMD_CLONE_ARG_TYPE_MASK);
4151 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4152 not in a branch, as we'd have to construct an all-true mask. */
4153 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4154 this_badness += 64;
4155 }
4156 if (bestn == NULL || this_badness < badness)
4157 {
4158 bestn = n;
4159 badness = this_badness;
4160 }
4161 }
4162
4163 if (bestn == NULL)
4164 return false;
4165
4166 unsigned int num_mask_args = 0;
4167 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4168 for (i = 0; i < nargs; i++)
4169 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4170 num_mask_args++;
4171
4172 for (i = 0; i < nargs; i++)
4173 {
4174 if ((arginfo[i].dt == vect_constant_def
4175 || arginfo[i].dt == vect_external_def)
4176 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4177 {
4178 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4179 i + masked_call_offset));
4180 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4181 slp_node);
4182 if (arginfo[i].vectype == NULL
4183 || !constant_multiple_p (bestn->simdclone->simdlen,
4184 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4185 return false;
4186 }
4187
4188 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4189 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4190 {
4191 if (dump_enabled_p ())
4192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4193 "vector mask arguments are not supported.\n");
4194 return false;
4195 }
4196
4197 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4198 {
4199 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4200 if (bestn->simdclone->mask_mode == VOIDmode)
4201 {
4202 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4203 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4204 {
4205 /* FORNOW we only have partial support for vector-type masks
4206 that can't hold all of simdlen. */
4207 if (dump_enabled_p ())
4208 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4209 vect_location,
4210 "in-branch vector clones are not yet"
4211 " supported for mismatched vector sizes.\n");
4212 return false;
4213 }
4214 if (!expand_vec_cond_expr_p (clone_arg_vectype,
4215 arginfo[i].vectype, ERROR_MARK))
4216 {
4217 if (dump_enabled_p ())
4218 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4219 vect_location,
4220 "cannot compute mask argument for"
4221 " in-branch vector clones.\n");
4222 return false;
4223 }
4224 }
4225 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4226 {
4227 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4228 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4229 num_mask_args),
4230 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4231 {
4232 /* FORNOW we only have partial support for integer-type masks
4233 that represent the same number of lanes as the
4234 vectorized mask inputs. */
4235 if (dump_enabled_p ())
4236 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4237 vect_location,
4238 "in-branch vector clones are not yet "
4239 "supported for mismatched vector sizes.\n");
4240 return false;
4241 }
4242 }
4243 else
4244 {
4245 if (dump_enabled_p ())
4246 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4247 vect_location,
4248 "in-branch vector clones not supported"
4249 " on this target.\n");
4250 return false;
4251 }
4252 }
4253 }
4254
4255 fndecl = bestn->decl;
4256 nunits = bestn->simdclone->simdlen;
4257 if (slp_node)
4258 ncopies = vector_unroll_factor (vf * group_size, nunits);
4259 else
4260 ncopies = vector_unroll_factor (vf, nunits);
4261
4262 /* If the function isn't const, only allow it in simd loops where user
4263 has asserted that at least nunits consecutive iterations can be
4264 performed using SIMD instructions. */
4265 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4266 && gimple_vuse (stmt))
4267 return false;
4268
4269 /* Sanity check: make sure that at least one copy of the vectorized stmt
4270 needs to be generated. */
4271 gcc_assert (ncopies >= 1);
4272
4273 if (!vec_stmt) /* transformation not required. */
4274 {
4275 if (slp_node)
4276 for (unsigned i = 0; i < nargs; ++i)
4277 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4278 {
4279 if (dump_enabled_p ())
4280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4281 "incompatible vector types for invariants\n");
4282 return false;
4283 }
4284 /* When the original call is pure or const but the SIMD ABI dictates
4285 an aggregate return we will have to use a virtual definition and
4286 in a loop eventually even need to add a virtual PHI. That's
4287 not straight-forward so allow to fix this up via renaming. */
4288 if (gimple_call_lhs (stmt)
4289 && !gimple_vdef (stmt)
4290 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4291 vinfo->any_known_not_updated_vssa = true;
4292 /* ??? For SLP code-gen we end up inserting after the last
4293 vector argument def rather than at the original call position
4294 so automagic virtual operand updating doesn't work. */
4295 if (gimple_vuse (stmt) && slp_node)
4296 vinfo->any_known_not_updated_vssa = true;
4297 simd_clone_info.safe_push (bestn->decl);
4298 for (i = 0; i < bestn->simdclone->nargs; i++)
4299 {
4300 switch (bestn->simdclone->args[i].arg_type)
4301 {
4302 default:
4303 continue;
4304 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4305 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4306 {
4307 simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4308 simd_clone_info.safe_push (arginfo[i].op);
4309 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4310 ? size_type_node : TREE_TYPE (arginfo[i].op);
4311 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4312 simd_clone_info.safe_push (ls);
4313 tree sll = arginfo[i].simd_lane_linear
4314 ? boolean_true_node : boolean_false_node;
4315 simd_clone_info.safe_push (sll);
4316 }
4317 break;
4318 case SIMD_CLONE_ARG_TYPE_MASK:
4319 if (loop_vinfo
4320 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4321 vect_record_loop_mask (loop_vinfo,
4322 &LOOP_VINFO_MASKS (loop_vinfo),
4323 ncopies, vectype, op);
4324
4325 break;
4326 }
4327 }
4328
4329 if (!bestn->simdclone->inbranch && loop_vinfo)
4330 {
4331 if (dump_enabled_p ()
4332 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4333 dump_printf_loc (MSG_NOTE, vect_location,
4334 "can't use a fully-masked loop because a"
4335 " non-masked simd clone was selected.\n");
4336 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4337 }
4338
4339 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4340 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4341 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4342 dt, slp_node, cost_vec); */
4343 return true;
4344 }
4345
4346 /* Transform. */
4347
4348 if (dump_enabled_p ())
4349 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4350
4351 /* Handle def. */
4352 scalar_dest = gimple_call_lhs (stmt);
4353 vec_dest = NULL_TREE;
4354 rtype = NULL_TREE;
4355 ratype = NULL_TREE;
4356 if (scalar_dest)
4357 {
4358 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4359 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4360 if (TREE_CODE (rtype) == ARRAY_TYPE)
4361 {
4362 ratype = rtype;
4363 rtype = TREE_TYPE (ratype);
4364 }
4365 }
4366
4367 auto_vec<vec<tree> > vec_oprnds;
4368 auto_vec<unsigned> vec_oprnds_i;
4369 vec_oprnds_i.safe_grow_cleared (nargs, true);
4370 if (slp_node)
4371 {
4372 vec_oprnds.reserve_exact (nargs);
4373 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4374 }
4375 else
4376 vec_oprnds.safe_grow_cleared (nargs, true);
4377 for (j = 0; j < ncopies; ++j)
4378 {
4379 poly_uint64 callee_nelements;
4380 poly_uint64 caller_nelements;
4381 /* Build argument list for the vectorized call. */
4382 if (j == 0)
4383 vargs.create (nargs);
4384 else
4385 vargs.truncate (0);
4386
4387 for (i = 0; i < nargs; i++)
4388 {
4389 unsigned int k, l, m, o;
4390 tree atype;
4391 op = gimple_call_arg (stmt, i + masked_call_offset);
4392 switch (bestn->simdclone->args[i].arg_type)
4393 {
4394 case SIMD_CLONE_ARG_TYPE_VECTOR:
4395 atype = bestn->simdclone->args[i].vector_type;
4396 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4397 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4398 o = vector_unroll_factor (nunits, callee_nelements);
4399 for (m = j * o; m < (j + 1) * o; m++)
4400 {
4401 if (known_lt (callee_nelements, caller_nelements))
4402 {
4403 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4404 if (!constant_multiple_p (caller_nelements,
4405 callee_nelements, &k))
4406 gcc_unreachable ();
4407
4408 gcc_assert ((k & (k - 1)) == 0);
4409 if (m == 0)
4410 {
4411 if (!slp_node)
4412 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4413 ncopies * o / k, op,
4414 &vec_oprnds[i]);
4415 vec_oprnds_i[i] = 0;
4416 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4417 }
4418 else
4419 {
4420 vec_oprnd0 = arginfo[i].op;
4421 if ((m & (k - 1)) == 0)
4422 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4423 }
4424 arginfo[i].op = vec_oprnd0;
4425 vec_oprnd0
4426 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4427 bitsize_int (prec),
4428 bitsize_int ((m & (k - 1)) * prec));
4429 gassign *new_stmt
4430 = gimple_build_assign (make_ssa_name (atype),
4431 vec_oprnd0);
4432 vect_finish_stmt_generation (vinfo, stmt_info,
4433 new_stmt, gsi);
4434 vargs.safe_push (gimple_assign_lhs (new_stmt));
4435 }
4436 else
4437 {
4438 if (!constant_multiple_p (callee_nelements,
4439 caller_nelements, &k))
4440 gcc_unreachable ();
4441 gcc_assert ((k & (k - 1)) == 0);
4442 vec<constructor_elt, va_gc> *ctor_elts;
4443 if (k != 1)
4444 vec_alloc (ctor_elts, k);
4445 else
4446 ctor_elts = NULL;
4447 for (l = 0; l < k; l++)
4448 {
4449 if (m == 0 && l == 0)
4450 {
4451 if (!slp_node)
4452 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4453 k * o * ncopies,
4454 op,
4455 &vec_oprnds[i]);
4456 vec_oprnds_i[i] = 0;
4457 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4458 }
4459 else
4460 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4461 arginfo[i].op = vec_oprnd0;
4462 if (k == 1)
4463 break;
4464 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4465 vec_oprnd0);
4466 }
4467 if (k == 1)
4468 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4469 atype))
4470 {
4471 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4472 vec_oprnd0);
4473 gassign *new_stmt
4474 = gimple_build_assign (make_ssa_name (atype),
4475 vec_oprnd0);
4476 vect_finish_stmt_generation (vinfo, stmt_info,
4477 new_stmt, gsi);
4478 vargs.safe_push (gimple_get_lhs (new_stmt));
4479 }
4480 else
4481 vargs.safe_push (vec_oprnd0);
4482 else
4483 {
4484 vec_oprnd0 = build_constructor (atype, ctor_elts);
4485 gassign *new_stmt
4486 = gimple_build_assign (make_ssa_name (atype),
4487 vec_oprnd0);
4488 vect_finish_stmt_generation (vinfo, stmt_info,
4489 new_stmt, gsi);
4490 vargs.safe_push (gimple_assign_lhs (new_stmt));
4491 }
4492 }
4493 }
4494 break;
4495 case SIMD_CLONE_ARG_TYPE_MASK:
4496 if (bestn->simdclone->mask_mode == VOIDmode)
4497 {
4498 atype = bestn->simdclone->args[i].vector_type;
4499 tree elt_type = TREE_TYPE (atype);
4500 tree one = fold_convert (elt_type, integer_one_node);
4501 tree zero = fold_convert (elt_type, integer_zero_node);
4502 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4503 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4504 o = vector_unroll_factor (nunits, callee_nelements);
4505 for (m = j * o; m < (j + 1) * o; m++)
4506 {
4507 if (maybe_lt (callee_nelements, caller_nelements))
4508 {
4509 /* The mask type has fewer elements than simdlen. */
4510
4511 /* FORNOW */
4512 gcc_unreachable ();
4513 }
4514 else if (known_eq (callee_nelements, caller_nelements))
4515 {
4516 /* The SIMD clone function has the same number of
4517 elements as the current function. */
4518 if (m == 0)
4519 {
4520 if (!slp_node)
4521 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4522 o * ncopies,
4523 op,
4524 &vec_oprnds[i]);
4525 vec_oprnds_i[i] = 0;
4526 }
4527 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4528 if (loop_vinfo
4529 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4530 {
4531 vec_loop_masks *loop_masks
4532 = &LOOP_VINFO_MASKS (loop_vinfo);
4533 tree loop_mask
4534 = vect_get_loop_mask (loop_vinfo, gsi,
4535 loop_masks, ncopies,
4536 vectype, j);
4537 vec_oprnd0
4538 = prepare_vec_mask (loop_vinfo,
4539 TREE_TYPE (loop_mask),
4540 loop_mask, vec_oprnd0,
4541 gsi);
4542 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4543 loop_mask });
4544
4545 }
4546 vec_oprnd0
4547 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4548 build_vector_from_val (atype, one),
4549 build_vector_from_val (atype, zero));
4550 gassign *new_stmt
4551 = gimple_build_assign (make_ssa_name (atype),
4552 vec_oprnd0);
4553 vect_finish_stmt_generation (vinfo, stmt_info,
4554 new_stmt, gsi);
4555 vargs.safe_push (gimple_assign_lhs (new_stmt));
4556 }
4557 else
4558 {
4559 /* The mask type has more elements than simdlen. */
4560
4561 /* FORNOW */
4562 gcc_unreachable ();
4563 }
4564 }
4565 }
4566 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4567 {
4568 atype = bestn->simdclone->args[i].vector_type;
4569 /* Guess the number of lanes represented by atype. */
4570 poly_uint64 atype_subparts
4571 = exact_div (bestn->simdclone->simdlen,
4572 num_mask_args);
4573 o = vector_unroll_factor (nunits, atype_subparts);
4574 for (m = j * o; m < (j + 1) * o; m++)
4575 {
4576 if (m == 0)
4577 {
4578 if (!slp_node)
4579 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4580 o * ncopies,
4581 op,
4582 &vec_oprnds[i]);
4583 vec_oprnds_i[i] = 0;
4584 }
4585 if (maybe_lt (atype_subparts,
4586 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4587 {
4588 /* The mask argument has fewer elements than the
4589 input vector. */
4590 /* FORNOW */
4591 gcc_unreachable ();
4592 }
4593 else if (known_eq (atype_subparts,
4594 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4595 {
4596 /* The vector mask argument matches the input
4597 in the number of lanes, but not necessarily
4598 in the mode. */
4599 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4600 tree st = lang_hooks.types.type_for_mode
4601 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4602 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4603 vec_oprnd0);
4604 gassign *new_stmt
4605 = gimple_build_assign (make_ssa_name (st),
4606 vec_oprnd0);
4607 vect_finish_stmt_generation (vinfo, stmt_info,
4608 new_stmt, gsi);
4609 if (!types_compatible_p (atype, st))
4610 {
4611 new_stmt
4612 = gimple_build_assign (make_ssa_name (atype),
4613 NOP_EXPR,
4614 gimple_assign_lhs
4615 (new_stmt));
4616 vect_finish_stmt_generation (vinfo, stmt_info,
4617 new_stmt, gsi);
4618 }
4619 vargs.safe_push (gimple_assign_lhs (new_stmt));
4620 }
4621 else
4622 {
4623 /* The mask argument has more elements than the
4624 input vector. */
4625 /* FORNOW */
4626 gcc_unreachable ();
4627 }
4628 }
4629 }
4630 else
4631 gcc_unreachable ();
4632 break;
4633 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4634 vargs.safe_push (op);
4635 break;
4636 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4637 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4638 if (j == 0)
4639 {
4640 gimple_seq stmts;
4641 arginfo[i].op
4642 = force_gimple_operand (unshare_expr (arginfo[i].op),
4643 &stmts, true, NULL_TREE);
4644 if (stmts != NULL)
4645 {
4646 basic_block new_bb;
4647 edge pe = loop_preheader_edge (loop);
4648 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4649 gcc_assert (!new_bb);
4650 }
4651 if (arginfo[i].simd_lane_linear)
4652 {
4653 vargs.safe_push (arginfo[i].op);
4654 break;
4655 }
4656 tree phi_res = copy_ssa_name (op);
4657 gphi *new_phi = create_phi_node (phi_res, loop->header);
4658 add_phi_arg (new_phi, arginfo[i].op,
4659 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4660 enum tree_code code
4661 = POINTER_TYPE_P (TREE_TYPE (op))
4662 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4663 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4664 ? sizetype : TREE_TYPE (op);
4665 poly_widest_int cst
4666 = wi::mul (bestn->simdclone->args[i].linear_step,
4667 ncopies * nunits);
4668 tree tcst = wide_int_to_tree (type, cst);
4669 tree phi_arg = copy_ssa_name (op);
4670 gassign *new_stmt
4671 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4672 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4673 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4674 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4675 UNKNOWN_LOCATION);
4676 arginfo[i].op = phi_res;
4677 vargs.safe_push (phi_res);
4678 }
4679 else
4680 {
4681 enum tree_code code
4682 = POINTER_TYPE_P (TREE_TYPE (op))
4683 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4684 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4685 ? sizetype : TREE_TYPE (op);
4686 poly_widest_int cst
4687 = wi::mul (bestn->simdclone->args[i].linear_step,
4688 j * nunits);
4689 tree tcst = wide_int_to_tree (type, cst);
4690 new_temp = make_ssa_name (TREE_TYPE (op));
4691 gassign *new_stmt
4692 = gimple_build_assign (new_temp, code,
4693 arginfo[i].op, tcst);
4694 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4695 vargs.safe_push (new_temp);
4696 }
4697 break;
4698 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4699 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4700 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4701 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4702 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4703 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4704 default:
4705 gcc_unreachable ();
4706 }
4707 }
4708
4709 if (masked_call_offset == 0
4710 && bestn->simdclone->inbranch
4711 && bestn->simdclone->nargs > nargs)
4712 {
4713 unsigned long m, o;
4714 size_t mask_i = bestn->simdclone->nargs - 1;
4715 tree mask;
4716 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4717 SIMD_CLONE_ARG_TYPE_MASK);
4718
4719 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4720 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4721 o = vector_unroll_factor (nunits, callee_nelements);
4722 for (m = j * o; m < (j + 1) * o; m++)
4723 {
4724 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4725 {
4726 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4727 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4728 ncopies, vectype, j);
4729 }
4730 else
4731 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4732
4733 gassign *new_stmt;
4734 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4735 {
4736 /* This means we are dealing with integer mask modes.
4737 First convert to an integer type with the same size as
4738 the current vector type. */
4739 unsigned HOST_WIDE_INT intermediate_size
4740 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4741 tree mid_int_type =
4742 build_nonstandard_integer_type (intermediate_size, 1);
4743 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4744 new_stmt
4745 = gimple_build_assign (make_ssa_name (mid_int_type),
4746 mask);
4747 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4748 /* Then zero-extend to the mask mode. */
4749 mask = fold_build1 (NOP_EXPR, masktype,
4750 gimple_get_lhs (new_stmt));
4751 }
4752 else if (bestn->simdclone->mask_mode == VOIDmode)
4753 {
4754 tree one = fold_convert (TREE_TYPE (masktype),
4755 integer_one_node);
4756 tree zero = fold_convert (TREE_TYPE (masktype),
4757 integer_zero_node);
4758 mask = build3 (VEC_COND_EXPR, masktype, mask,
4759 build_vector_from_val (masktype, one),
4760 build_vector_from_val (masktype, zero));
4761 }
4762 else
4763 gcc_unreachable ();
4764
4765 new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
4766 vect_finish_stmt_generation (vinfo, stmt_info,
4767 new_stmt, gsi);
4768 mask = gimple_assign_lhs (new_stmt);
4769 vargs.safe_push (mask);
4770 }
4771 }
4772
4773 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4774 if (vec_dest)
4775 {
4776 gcc_assert (ratype
4777 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4778 if (ratype)
4779 new_temp = create_tmp_var (ratype);
4780 else if (useless_type_conversion_p (vectype, rtype))
4781 new_temp = make_ssa_name (vec_dest, new_call);
4782 else
4783 new_temp = make_ssa_name (rtype, new_call);
4784 gimple_call_set_lhs (new_call, new_temp);
4785 }
4786 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4787 gimple *new_stmt = new_call;
4788
4789 if (vec_dest)
4790 {
4791 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4792 {
4793 unsigned int k, l;
4794 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4795 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4796 k = vector_unroll_factor (nunits,
4797 TYPE_VECTOR_SUBPARTS (vectype));
4798 gcc_assert ((k & (k - 1)) == 0);
4799 for (l = 0; l < k; l++)
4800 {
4801 tree t;
4802 if (ratype)
4803 {
4804 t = build_fold_addr_expr (new_temp);
4805 t = build2 (MEM_REF, vectype, t,
4806 build_int_cst (TREE_TYPE (t), l * bytes));
4807 }
4808 else
4809 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4810 bitsize_int (prec), bitsize_int (l * prec));
4811 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4812 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4813
4814 if (j == 0 && l == 0)
4815 *vec_stmt = new_stmt;
4816 if (slp_node)
4817 SLP_TREE_VEC_DEFS (slp_node)
4818 .quick_push (gimple_assign_lhs (new_stmt));
4819 else
4820 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4821 }
4822
4823 if (ratype)
4824 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4825 continue;
4826 }
4827 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4828 {
4829 unsigned int k;
4830 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4831 TYPE_VECTOR_SUBPARTS (rtype), &k))
4832 gcc_unreachable ();
4833 gcc_assert ((k & (k - 1)) == 0);
4834 if ((j & (k - 1)) == 0)
4835 vec_alloc (ret_ctor_elts, k);
4836 if (ratype)
4837 {
4838 unsigned int m, o;
4839 o = vector_unroll_factor (nunits,
4840 TYPE_VECTOR_SUBPARTS (rtype));
4841 for (m = 0; m < o; m++)
4842 {
4843 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4844 size_int (m), NULL_TREE, NULL_TREE);
4845 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4846 tem);
4847 vect_finish_stmt_generation (vinfo, stmt_info,
4848 new_stmt, gsi);
4849 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4850 gimple_assign_lhs (new_stmt));
4851 }
4852 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4853 }
4854 else
4855 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4856 if ((j & (k - 1)) != k - 1)
4857 continue;
4858 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4859 new_stmt
4860 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4861 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4862
4863 if ((unsigned) j == k - 1)
4864 *vec_stmt = new_stmt;
4865 if (slp_node)
4866 SLP_TREE_VEC_DEFS (slp_node)
4867 .quick_push (gimple_assign_lhs (new_stmt));
4868 else
4869 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4870 continue;
4871 }
4872 else if (ratype)
4873 {
4874 tree t = build_fold_addr_expr (new_temp);
4875 t = build2 (MEM_REF, vectype, t,
4876 build_int_cst (TREE_TYPE (t), 0));
4877 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4878 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4879 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4880 }
4881 else if (!useless_type_conversion_p (vectype, rtype))
4882 {
4883 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4884 new_stmt
4885 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4886 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4887 }
4888 }
4889
4890 if (j == 0)
4891 *vec_stmt = new_stmt;
4892 if (slp_node)
4893 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4894 else
4895 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4896 }
4897
4898 for (i = 0; i < nargs; ++i)
4899 {
4900 vec<tree> oprndsi = vec_oprnds[i];
4901 oprndsi.release ();
4902 }
4903 vargs.release ();
4904
4905 /* Mark the clone as no longer being a candidate for GC. */
4906 bestn->gc_candidate = false;
4907
4908 /* The call in STMT might prevent it from being removed in dce.
4909 We however cannot remove it here, due to the way the ssa name
4910 it defines is mapped to the new definition. So just replace
4911 rhs of the statement with something harmless. */
4912
4913 if (slp_node)
4914 return true;
4915
4916 gimple *new_stmt;
4917 if (scalar_dest)
4918 {
4919 type = TREE_TYPE (scalar_dest);
4920 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4921 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4922 }
4923 else
4924 new_stmt = gimple_build_nop ();
4925 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4926 unlink_stmt_vdef (stmt);
4927
4928 return true;
4929 }
4930
4931
4932 /* Function vect_gen_widened_results_half
4933
4934 Create a vector stmt whose code, type, number of arguments, and result
4935 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4936 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4937 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4938 needs to be created (DECL is a function-decl of a target-builtin).
4939 STMT_INFO is the original scalar stmt that we are vectorizing. */
4940
4941 static gimple *
4942 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4943 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4944 tree vec_dest, gimple_stmt_iterator *gsi,
4945 stmt_vec_info stmt_info)
4946 {
4947 gimple *new_stmt;
4948 tree new_temp;
4949
4950 /* Generate half of the widened result: */
4951 if (op_type != binary_op)
4952 vec_oprnd1 = NULL;
4953 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4954 new_temp = make_ssa_name (vec_dest, new_stmt);
4955 gimple_set_lhs (new_stmt, new_temp);
4956 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4957
4958 return new_stmt;
4959 }
4960
4961
4962 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4963 For multi-step conversions store the resulting vectors and call the function
4964 recursively. When NARROW_SRC_P is true, there's still a conversion after
4965 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4966 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4967
4968 static void
4969 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4970 int multi_step_cvt,
4971 stmt_vec_info stmt_info,
4972 vec<tree> &vec_dsts,
4973 gimple_stmt_iterator *gsi,
4974 slp_tree slp_node, code_helper code,
4975 bool narrow_src_p)
4976 {
4977 unsigned int i;
4978 tree vop0, vop1, new_tmp, vec_dest;
4979
4980 vec_dest = vec_dsts.pop ();
4981
4982 for (i = 0; i < vec_oprnds->length (); i += 2)
4983 {
4984 /* Create demotion operation. */
4985 vop0 = (*vec_oprnds)[i];
4986 vop1 = (*vec_oprnds)[i + 1];
4987 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4988 new_tmp = make_ssa_name (vec_dest, new_stmt);
4989 gimple_set_lhs (new_stmt, new_tmp);
4990 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4991 if (multi_step_cvt || narrow_src_p)
4992 /* Store the resulting vector for next recursive call,
4993 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4994 (*vec_oprnds)[i/2] = new_tmp;
4995 else
4996 {
4997 /* This is the last step of the conversion sequence. Store the
4998 vectors in SLP_NODE or in vector info of the scalar statement
4999 (or in STMT_VINFO_RELATED_STMT chain). */
5000 if (slp_node)
5001 slp_node->push_vec_def (new_stmt);
5002 else
5003 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5004 }
5005 }
5006
5007 /* For multi-step demotion operations we first generate demotion operations
5008 from the source type to the intermediate types, and then combine the
5009 results (stored in VEC_OPRNDS) in demotion operation to the destination
5010 type. */
5011 if (multi_step_cvt)
5012 {
5013 /* At each level of recursion we have half of the operands we had at the
5014 previous level. */
5015 vec_oprnds->truncate ((i+1)/2);
5016 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5017 multi_step_cvt - 1,
5018 stmt_info, vec_dsts, gsi,
5019 slp_node, VEC_PACK_TRUNC_EXPR,
5020 narrow_src_p);
5021 }
5022
5023 vec_dsts.quick_push (vec_dest);
5024 }
5025
5026
5027 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5028 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5029 STMT_INFO. For multi-step conversions store the resulting vectors and
5030 call the function recursively. */
5031
5032 static void
5033 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5034 vec<tree> *vec_oprnds0,
5035 vec<tree> *vec_oprnds1,
5036 stmt_vec_info stmt_info, tree vec_dest,
5037 gimple_stmt_iterator *gsi,
5038 code_helper ch1,
5039 code_helper ch2, int op_type)
5040 {
5041 int i;
5042 tree vop0, vop1, new_tmp1, new_tmp2;
5043 gimple *new_stmt1, *new_stmt2;
5044 vec<tree> vec_tmp = vNULL;
5045
5046 vec_tmp.create (vec_oprnds0->length () * 2);
5047 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5048 {
5049 if (op_type == binary_op)
5050 vop1 = (*vec_oprnds1)[i];
5051 else
5052 vop1 = NULL_TREE;
5053
5054 /* Generate the two halves of promotion operation. */
5055 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5056 op_type, vec_dest, gsi,
5057 stmt_info);
5058 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5059 op_type, vec_dest, gsi,
5060 stmt_info);
5061 if (is_gimple_call (new_stmt1))
5062 {
5063 new_tmp1 = gimple_call_lhs (new_stmt1);
5064 new_tmp2 = gimple_call_lhs (new_stmt2);
5065 }
5066 else
5067 {
5068 new_tmp1 = gimple_assign_lhs (new_stmt1);
5069 new_tmp2 = gimple_assign_lhs (new_stmt2);
5070 }
5071
5072 /* Store the results for the next step. */
5073 vec_tmp.quick_push (new_tmp1);
5074 vec_tmp.quick_push (new_tmp2);
5075 }
5076
5077 vec_oprnds0->release ();
5078 *vec_oprnds0 = vec_tmp;
5079 }
5080
5081 /* Create vectorized promotion stmts for widening stmts using only half the
5082 potential vector size for input. */
5083 static void
5084 vect_create_half_widening_stmts (vec_info *vinfo,
5085 vec<tree> *vec_oprnds0,
5086 vec<tree> *vec_oprnds1,
5087 stmt_vec_info stmt_info, tree vec_dest,
5088 gimple_stmt_iterator *gsi,
5089 code_helper code1,
5090 int op_type)
5091 {
5092 int i;
5093 tree vop0, vop1;
5094 gimple *new_stmt1;
5095 gimple *new_stmt2;
5096 gimple *new_stmt3;
5097 vec<tree> vec_tmp = vNULL;
5098
5099 vec_tmp.create (vec_oprnds0->length ());
5100 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5101 {
5102 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5103
5104 gcc_assert (op_type == binary_op);
5105 vop1 = (*vec_oprnds1)[i];
5106
5107 /* Widen the first vector input. */
5108 out_type = TREE_TYPE (vec_dest);
5109 new_tmp1 = make_ssa_name (out_type);
5110 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5111 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5112 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5113 {
5114 /* Widen the second vector input. */
5115 new_tmp2 = make_ssa_name (out_type);
5116 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5117 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5118 /* Perform the operation. With both vector inputs widened. */
5119 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5120 }
5121 else
5122 {
5123 /* Perform the operation. With the single vector input widened. */
5124 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5125 }
5126
5127 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5128 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5129 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5130
5131 /* Store the results for the next step. */
5132 vec_tmp.quick_push (new_tmp3);
5133 }
5134
5135 vec_oprnds0->release ();
5136 *vec_oprnds0 = vec_tmp;
5137 }
5138
5139
5140 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5141 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5142 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5143 Return true if STMT_INFO is vectorizable in this way. */
5144
5145 static bool
5146 vectorizable_conversion (vec_info *vinfo,
5147 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5148 gimple **vec_stmt, slp_tree slp_node,
5149 stmt_vector_for_cost *cost_vec)
5150 {
5151 tree vec_dest, cvt_op = NULL_TREE;
5152 tree scalar_dest;
5153 tree op0, op1 = NULL_TREE;
5154 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5155 tree_code tc1, tc2;
5156 code_helper code, code1, code2;
5157 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5158 tree new_temp;
5159 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5160 int ndts = 2;
5161 poly_uint64 nunits_in;
5162 poly_uint64 nunits_out;
5163 tree vectype_out, vectype_in;
5164 int ncopies, i;
5165 tree lhs_type, rhs_type;
5166 /* For conversions between floating point and integer, there're 2 NARROW
5167 cases. NARROW_SRC is for FLOAT_EXPR, means
5168 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5169 This is safe when the range of the source integer can fit into the lower
5170 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5171 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5172 For other conversions, when there's narrowing, NARROW_DST is used as
5173 default. */
5174 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5175 vec<tree> vec_oprnds0 = vNULL;
5176 vec<tree> vec_oprnds1 = vNULL;
5177 tree vop0;
5178 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5179 int multi_step_cvt = 0;
5180 vec<tree> interm_types = vNULL;
5181 tree intermediate_type, cvt_type = NULL_TREE;
5182 int op_type;
5183 unsigned short fltsz;
5184
5185 /* Is STMT a vectorizable conversion? */
5186
5187 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5188 return false;
5189
5190 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5191 && ! vec_stmt)
5192 return false;
5193
5194 gimple* stmt = stmt_info->stmt;
5195 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5196 return false;
5197
5198 if (gimple_get_lhs (stmt) == NULL_TREE
5199 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5200 return false;
5201
5202 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5203 return false;
5204
5205 if (is_gimple_assign (stmt))
5206 {
5207 code = gimple_assign_rhs_code (stmt);
5208 op_type = TREE_CODE_LENGTH ((tree_code) code);
5209 }
5210 else if (gimple_call_internal_p (stmt))
5211 {
5212 code = gimple_call_internal_fn (stmt);
5213 op_type = gimple_call_num_args (stmt);
5214 }
5215 else
5216 return false;
5217
5218 bool widen_arith = (code == WIDEN_MULT_EXPR
5219 || code == WIDEN_LSHIFT_EXPR
5220 || widening_fn_p (code));
5221
5222 if (!widen_arith
5223 && !CONVERT_EXPR_CODE_P (code)
5224 && code != FIX_TRUNC_EXPR
5225 && code != FLOAT_EXPR)
5226 return false;
5227
5228 /* Check types of lhs and rhs. */
5229 scalar_dest = gimple_get_lhs (stmt);
5230 lhs_type = TREE_TYPE (scalar_dest);
5231 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5232
5233 /* Check the operands of the operation. */
5234 slp_tree slp_op0, slp_op1 = NULL;
5235 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5236 0, &op0, &slp_op0, &dt[0], &vectype_in))
5237 {
5238 if (dump_enabled_p ())
5239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5240 "use not simple.\n");
5241 return false;
5242 }
5243
5244 rhs_type = TREE_TYPE (op0);
5245 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5246 && !((INTEGRAL_TYPE_P (lhs_type)
5247 && INTEGRAL_TYPE_P (rhs_type))
5248 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5249 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5250 return false;
5251
5252 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5253 && ((INTEGRAL_TYPE_P (lhs_type)
5254 && !type_has_mode_precision_p (lhs_type))
5255 || (INTEGRAL_TYPE_P (rhs_type)
5256 && !type_has_mode_precision_p (rhs_type))))
5257 {
5258 if (dump_enabled_p ())
5259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5260 "type conversion to/from bit-precision unsupported."
5261 "\n");
5262 return false;
5263 }
5264
5265 if (op_type == binary_op)
5266 {
5267 gcc_assert (code == WIDEN_MULT_EXPR
5268 || code == WIDEN_LSHIFT_EXPR
5269 || widening_fn_p (code));
5270
5271 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5272 gimple_call_arg (stmt, 0);
5273 tree vectype1_in;
5274 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5275 &op1, &slp_op1, &dt[1], &vectype1_in))
5276 {
5277 if (dump_enabled_p ())
5278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5279 "use not simple.\n");
5280 return false;
5281 }
5282 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5283 OP1. */
5284 if (!vectype_in)
5285 vectype_in = vectype1_in;
5286 }
5287
5288 /* If op0 is an external or constant def, infer the vector type
5289 from the scalar type. */
5290 if (!vectype_in)
5291 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5292 if (vec_stmt)
5293 gcc_assert (vectype_in);
5294 if (!vectype_in)
5295 {
5296 if (dump_enabled_p ())
5297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5298 "no vectype for scalar type %T\n", rhs_type);
5299
5300 return false;
5301 }
5302
5303 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5304 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5305 {
5306 if (dump_enabled_p ())
5307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5308 "can't convert between boolean and non "
5309 "boolean vectors %T\n", rhs_type);
5310
5311 return false;
5312 }
5313
5314 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5315 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5316 if (known_eq (nunits_out, nunits_in))
5317 if (widen_arith)
5318 modifier = WIDEN;
5319 else
5320 modifier = NONE;
5321 else if (multiple_p (nunits_out, nunits_in))
5322 modifier = NARROW_DST;
5323 else
5324 {
5325 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5326 modifier = WIDEN;
5327 }
5328
5329 /* Multiple types in SLP are handled by creating the appropriate number of
5330 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5331 case of SLP. */
5332 if (slp_node)
5333 ncopies = 1;
5334 else if (modifier == NARROW_DST)
5335 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5336 else
5337 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5338
5339 /* Sanity check: make sure that at least one copy of the vectorized stmt
5340 needs to be generated. */
5341 gcc_assert (ncopies >= 1);
5342
5343 bool found_mode = false;
5344 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5345 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5346 opt_scalar_mode rhs_mode_iter;
5347
5348 /* Supportable by target? */
5349 switch (modifier)
5350 {
5351 case NONE:
5352 if (code != FIX_TRUNC_EXPR
5353 && code != FLOAT_EXPR
5354 && !CONVERT_EXPR_CODE_P (code))
5355 return false;
5356 gcc_assert (code.is_tree_code ());
5357 if (supportable_convert_operation ((tree_code) code, vectype_out,
5358 vectype_in, &tc1))
5359 {
5360 code1 = tc1;
5361 break;
5362 }
5363
5364 /* For conversions between float and integer types try whether
5365 we can use intermediate signed integer types to support the
5366 conversion. */
5367 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5368 && (code == FLOAT_EXPR ||
5369 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5370 {
5371 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5372 bool float_expr_p = code == FLOAT_EXPR;
5373 unsigned short target_size;
5374 scalar_mode intermediate_mode;
5375 if (demotion)
5376 {
5377 intermediate_mode = lhs_mode;
5378 target_size = GET_MODE_SIZE (rhs_mode);
5379 }
5380 else
5381 {
5382 target_size = GET_MODE_SIZE (lhs_mode);
5383 if (!int_mode_for_size
5384 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5385 goto unsupported;
5386 }
5387 code1 = float_expr_p ? code : NOP_EXPR;
5388 codecvt1 = float_expr_p ? NOP_EXPR : code;
5389 opt_scalar_mode mode_iter;
5390 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5391 {
5392 intermediate_mode = mode_iter.require ();
5393
5394 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5395 break;
5396
5397 scalar_mode cvt_mode;
5398 if (!int_mode_for_size
5399 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5400 break;
5401
5402 cvt_type = build_nonstandard_integer_type
5403 (GET_MODE_BITSIZE (cvt_mode), 0);
5404
5405 /* Check if the intermediate type can hold OP0's range.
5406 When converting from float to integer this is not necessary
5407 because values that do not fit the (smaller) target type are
5408 unspecified anyway. */
5409 if (demotion && float_expr_p)
5410 {
5411 wide_int op_min_value, op_max_value;
5412 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5413 break;
5414
5415 if (cvt_type == NULL_TREE
5416 || (wi::min_precision (op_max_value, SIGNED)
5417 > TYPE_PRECISION (cvt_type))
5418 || (wi::min_precision (op_min_value, SIGNED)
5419 > TYPE_PRECISION (cvt_type)))
5420 continue;
5421 }
5422
5423 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5424 /* This should only happened for SLP as long as loop vectorizer
5425 only supports same-sized vector. */
5426 if (cvt_type == NULL_TREE
5427 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5428 || !supportable_convert_operation ((tree_code) code1,
5429 vectype_out,
5430 cvt_type, &tc1)
5431 || !supportable_convert_operation ((tree_code) codecvt1,
5432 cvt_type,
5433 vectype_in, &tc2))
5434 continue;
5435
5436 found_mode = true;
5437 break;
5438 }
5439
5440 if (found_mode)
5441 {
5442 multi_step_cvt++;
5443 interm_types.safe_push (cvt_type);
5444 cvt_type = NULL_TREE;
5445 code1 = tc1;
5446 codecvt1 = tc2;
5447 break;
5448 }
5449 }
5450 /* FALLTHRU */
5451 unsupported:
5452 if (dump_enabled_p ())
5453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5454 "conversion not supported by target.\n");
5455 return false;
5456
5457 case WIDEN:
5458 if (known_eq (nunits_in, nunits_out))
5459 {
5460 if (!(code.is_tree_code ()
5461 && supportable_half_widening_operation ((tree_code) code,
5462 vectype_out, vectype_in,
5463 &tc1)))
5464 goto unsupported;
5465 code1 = tc1;
5466 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5467 break;
5468 }
5469 if (supportable_widening_operation (vinfo, code, stmt_info,
5470 vectype_out, vectype_in, &code1,
5471 &code2, &multi_step_cvt,
5472 &interm_types))
5473 {
5474 /* Binary widening operation can only be supported directly by the
5475 architecture. */
5476 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5477 break;
5478 }
5479
5480 if (code != FLOAT_EXPR
5481 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5482 goto unsupported;
5483
5484 fltsz = GET_MODE_SIZE (lhs_mode);
5485 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5486 {
5487 rhs_mode = rhs_mode_iter.require ();
5488 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5489 break;
5490
5491 cvt_type
5492 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5493 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5494 if (cvt_type == NULL_TREE)
5495 goto unsupported;
5496
5497 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5498 {
5499 tc1 = ERROR_MARK;
5500 gcc_assert (code.is_tree_code ());
5501 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5502 cvt_type, &tc1))
5503 goto unsupported;
5504 codecvt1 = tc1;
5505 }
5506 else if (!supportable_widening_operation (vinfo, code,
5507 stmt_info, vectype_out,
5508 cvt_type, &codecvt1,
5509 &codecvt2, &multi_step_cvt,
5510 &interm_types))
5511 continue;
5512 else
5513 gcc_assert (multi_step_cvt == 0);
5514
5515 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5516 cvt_type,
5517 vectype_in, &code1,
5518 &code2, &multi_step_cvt,
5519 &interm_types))
5520 {
5521 found_mode = true;
5522 break;
5523 }
5524 }
5525
5526 if (!found_mode)
5527 goto unsupported;
5528
5529 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5530 codecvt2 = ERROR_MARK;
5531 else
5532 {
5533 multi_step_cvt++;
5534 interm_types.safe_push (cvt_type);
5535 cvt_type = NULL_TREE;
5536 }
5537 break;
5538
5539 case NARROW_DST:
5540 gcc_assert (op_type == unary_op);
5541 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5542 &code1, &multi_step_cvt,
5543 &interm_types))
5544 break;
5545
5546 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5547 goto unsupported;
5548
5549 if (code == FIX_TRUNC_EXPR)
5550 {
5551 cvt_type
5552 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5553 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5554 if (cvt_type == NULL_TREE)
5555 goto unsupported;
5556 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5557 &tc1))
5558 codecvt1 = tc1;
5559 else
5560 goto unsupported;
5561 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5562 &code1, &multi_step_cvt,
5563 &interm_types))
5564 break;
5565 }
5566 /* If op0 can be represented with low precision integer,
5567 truncate it to cvt_type and the do FLOAT_EXPR. */
5568 else if (code == FLOAT_EXPR)
5569 {
5570 wide_int op_min_value, op_max_value;
5571 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5572 goto unsupported;
5573
5574 cvt_type
5575 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5576 if (cvt_type == NULL_TREE
5577 || (wi::min_precision (op_max_value, SIGNED)
5578 > TYPE_PRECISION (cvt_type))
5579 || (wi::min_precision (op_min_value, SIGNED)
5580 > TYPE_PRECISION (cvt_type)))
5581 goto unsupported;
5582
5583 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5584 if (cvt_type == NULL_TREE)
5585 goto unsupported;
5586 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5587 &code1, &multi_step_cvt,
5588 &interm_types))
5589 goto unsupported;
5590 if (supportable_convert_operation ((tree_code) code, vectype_out,
5591 cvt_type, &tc1))
5592 {
5593 codecvt1 = tc1;
5594 modifier = NARROW_SRC;
5595 break;
5596 }
5597 }
5598
5599 goto unsupported;
5600
5601 default:
5602 gcc_unreachable ();
5603 }
5604
5605 if (!vec_stmt) /* transformation not required. */
5606 {
5607 if (slp_node
5608 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5609 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5610 {
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5613 "incompatible vector types for invariants\n");
5614 return false;
5615 }
5616 DUMP_VECT_SCOPE ("vectorizable_conversion");
5617 if (modifier == NONE)
5618 {
5619 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5620 vect_model_simple_cost (vinfo, stmt_info,
5621 ncopies * (1 + multi_step_cvt),
5622 dt, ndts, slp_node, cost_vec);
5623 }
5624 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5625 {
5626 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5627 /* The final packing step produces one vector result per copy. */
5628 unsigned int nvectors
5629 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5630 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5631 multi_step_cvt, cost_vec,
5632 widen_arith);
5633 }
5634 else
5635 {
5636 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5637 /* The initial unpacking step produces two vector results
5638 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5639 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5640 unsigned int nvectors
5641 = (slp_node
5642 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5643 : ncopies * 2);
5644 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5645 multi_step_cvt, cost_vec,
5646 widen_arith);
5647 }
5648 interm_types.release ();
5649 return true;
5650 }
5651
5652 /* Transform. */
5653 if (dump_enabled_p ())
5654 dump_printf_loc (MSG_NOTE, vect_location,
5655 "transform conversion. ncopies = %d.\n", ncopies);
5656
5657 if (op_type == binary_op)
5658 {
5659 if (CONSTANT_CLASS_P (op0))
5660 op0 = fold_convert (TREE_TYPE (op1), op0);
5661 else if (CONSTANT_CLASS_P (op1))
5662 op1 = fold_convert (TREE_TYPE (op0), op1);
5663 }
5664
5665 /* In case of multi-step conversion, we first generate conversion operations
5666 to the intermediate types, and then from that types to the final one.
5667 We create vector destinations for the intermediate type (TYPES) received
5668 from supportable_*_operation, and store them in the correct order
5669 for future use in vect_create_vectorized_*_stmts (). */
5670 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5671 bool widen_or_narrow_float_p
5672 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5673 vec_dest = vect_create_destination_var (scalar_dest,
5674 widen_or_narrow_float_p
5675 ? cvt_type : vectype_out);
5676 vec_dsts.quick_push (vec_dest);
5677
5678 if (multi_step_cvt)
5679 {
5680 for (i = interm_types.length () - 1;
5681 interm_types.iterate (i, &intermediate_type); i--)
5682 {
5683 vec_dest = vect_create_destination_var (scalar_dest,
5684 intermediate_type);
5685 vec_dsts.quick_push (vec_dest);
5686 }
5687 }
5688
5689 if (cvt_type)
5690 vec_dest = vect_create_destination_var (scalar_dest,
5691 widen_or_narrow_float_p
5692 ? vectype_out : cvt_type);
5693
5694 int ninputs = 1;
5695 if (!slp_node)
5696 {
5697 if (modifier == WIDEN)
5698 ;
5699 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5700 {
5701 if (multi_step_cvt)
5702 ninputs = vect_pow2 (multi_step_cvt);
5703 ninputs *= 2;
5704 }
5705 }
5706
5707 switch (modifier)
5708 {
5709 case NONE:
5710 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5711 op0, vectype_in, &vec_oprnds0);
5712 /* vec_dest is intermediate type operand when multi_step_cvt. */
5713 if (multi_step_cvt)
5714 {
5715 cvt_op = vec_dest;
5716 vec_dest = vec_dsts[0];
5717 }
5718
5719 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5720 {
5721 /* Arguments are ready, create the new vector stmt. */
5722 gimple* new_stmt;
5723 if (multi_step_cvt)
5724 {
5725 gcc_assert (multi_step_cvt == 1);
5726 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5727 new_temp = make_ssa_name (cvt_op, new_stmt);
5728 gimple_assign_set_lhs (new_stmt, new_temp);
5729 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5730 vop0 = new_temp;
5731 }
5732 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5733 new_temp = make_ssa_name (vec_dest, new_stmt);
5734 gimple_set_lhs (new_stmt, new_temp);
5735 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5736
5737 if (slp_node)
5738 slp_node->push_vec_def (new_stmt);
5739 else
5740 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5741 }
5742 break;
5743
5744 case WIDEN:
5745 /* In case the vectorization factor (VF) is bigger than the number
5746 of elements that we can fit in a vectype (nunits), we have to
5747 generate more than one vector stmt - i.e - we need to "unroll"
5748 the vector stmt by a factor VF/nunits. */
5749 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5750 op0, vectype_in, &vec_oprnds0,
5751 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5752 vectype_in, &vec_oprnds1);
5753 if (code == WIDEN_LSHIFT_EXPR)
5754 {
5755 int oprnds_size = vec_oprnds0.length ();
5756 vec_oprnds1.create (oprnds_size);
5757 for (i = 0; i < oprnds_size; ++i)
5758 vec_oprnds1.quick_push (op1);
5759 }
5760 /* Arguments are ready. Create the new vector stmts. */
5761 for (i = multi_step_cvt; i >= 0; i--)
5762 {
5763 tree this_dest = vec_dsts[i];
5764 code_helper c1 = code1, c2 = code2;
5765 if (i == 0 && codecvt2 != ERROR_MARK)
5766 {
5767 c1 = codecvt1;
5768 c2 = codecvt2;
5769 }
5770 if (known_eq (nunits_out, nunits_in))
5771 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5772 stmt_info, this_dest, gsi, c1,
5773 op_type);
5774 else
5775 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5776 &vec_oprnds1, stmt_info,
5777 this_dest, gsi,
5778 c1, c2, op_type);
5779 }
5780
5781 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5782 {
5783 gimple *new_stmt;
5784 if (cvt_type)
5785 {
5786 new_temp = make_ssa_name (vec_dest);
5787 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5788 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5789 }
5790 else
5791 new_stmt = SSA_NAME_DEF_STMT (vop0);
5792
5793 if (slp_node)
5794 slp_node->push_vec_def (new_stmt);
5795 else
5796 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5797 }
5798 break;
5799
5800 case NARROW_SRC:
5801 case NARROW_DST:
5802 /* In case the vectorization factor (VF) is bigger than the number
5803 of elements that we can fit in a vectype (nunits), we have to
5804 generate more than one vector stmt - i.e - we need to "unroll"
5805 the vector stmt by a factor VF/nunits. */
5806 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5807 op0, vectype_in, &vec_oprnds0);
5808 /* Arguments are ready. Create the new vector stmts. */
5809 if (cvt_type && modifier == NARROW_DST)
5810 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5811 {
5812 new_temp = make_ssa_name (vec_dest);
5813 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5814 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5815 vec_oprnds0[i] = new_temp;
5816 }
5817
5818 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5819 multi_step_cvt,
5820 stmt_info, vec_dsts, gsi,
5821 slp_node, code1,
5822 modifier == NARROW_SRC);
5823 /* After demoting op0 to cvt_type, convert it to dest. */
5824 if (cvt_type && code == FLOAT_EXPR)
5825 {
5826 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5827 {
5828 /* Arguments are ready, create the new vector stmt. */
5829 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5830 gimple *new_stmt
5831 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5832 new_temp = make_ssa_name (vec_dest, new_stmt);
5833 gimple_set_lhs (new_stmt, new_temp);
5834 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5835
5836 /* This is the last step of the conversion sequence. Store the
5837 vectors in SLP_NODE or in vector info of the scalar statement
5838 (or in STMT_VINFO_RELATED_STMT chain). */
5839 if (slp_node)
5840 slp_node->push_vec_def (new_stmt);
5841 else
5842 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5843 }
5844 }
5845 break;
5846 }
5847 if (!slp_node)
5848 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5849
5850 vec_oprnds0.release ();
5851 vec_oprnds1.release ();
5852 interm_types.release ();
5853
5854 return true;
5855 }
5856
5857 /* Return true if we can assume from the scalar form of STMT_INFO that
5858 neither the scalar nor the vector forms will generate code. STMT_INFO
5859 is known not to involve a data reference. */
5860
5861 bool
5862 vect_nop_conversion_p (stmt_vec_info stmt_info)
5863 {
5864 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5865 if (!stmt)
5866 return false;
5867
5868 tree lhs = gimple_assign_lhs (stmt);
5869 tree_code code = gimple_assign_rhs_code (stmt);
5870 tree rhs = gimple_assign_rhs1 (stmt);
5871
5872 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5873 return true;
5874
5875 if (CONVERT_EXPR_CODE_P (code))
5876 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5877
5878 return false;
5879 }
5880
5881 /* Function vectorizable_assignment.
5882
5883 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5884 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5885 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5886 Return true if STMT_INFO is vectorizable in this way. */
5887
5888 static bool
5889 vectorizable_assignment (vec_info *vinfo,
5890 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5891 gimple **vec_stmt, slp_tree slp_node,
5892 stmt_vector_for_cost *cost_vec)
5893 {
5894 tree vec_dest;
5895 tree scalar_dest;
5896 tree op;
5897 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5898 tree new_temp;
5899 enum vect_def_type dt[1] = {vect_unknown_def_type};
5900 int ndts = 1;
5901 int ncopies;
5902 int i;
5903 vec<tree> vec_oprnds = vNULL;
5904 tree vop;
5905 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5906 enum tree_code code;
5907 tree vectype_in;
5908
5909 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5910 return false;
5911
5912 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5913 && ! vec_stmt)
5914 return false;
5915
5916 /* Is vectorizable assignment? */
5917 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5918 if (!stmt)
5919 return false;
5920
5921 scalar_dest = gimple_assign_lhs (stmt);
5922 if (TREE_CODE (scalar_dest) != SSA_NAME)
5923 return false;
5924
5925 if (STMT_VINFO_DATA_REF (stmt_info))
5926 return false;
5927
5928 code = gimple_assign_rhs_code (stmt);
5929 if (!(gimple_assign_single_p (stmt)
5930 || code == PAREN_EXPR
5931 || CONVERT_EXPR_CODE_P (code)))
5932 return false;
5933
5934 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5935 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5936
5937 /* Multiple types in SLP are handled by creating the appropriate number of
5938 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5939 case of SLP. */
5940 if (slp_node)
5941 ncopies = 1;
5942 else
5943 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5944
5945 gcc_assert (ncopies >= 1);
5946
5947 slp_tree slp_op;
5948 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5949 &dt[0], &vectype_in))
5950 {
5951 if (dump_enabled_p ())
5952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5953 "use not simple.\n");
5954 return false;
5955 }
5956 if (!vectype_in)
5957 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5958
5959 /* We can handle VIEW_CONVERT conversions that do not change the number
5960 of elements or the vector size or other conversions when the component
5961 types are nop-convertible. */
5962 if (!vectype_in
5963 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5964 || (code == VIEW_CONVERT_EXPR
5965 && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5966 GET_MODE_SIZE (TYPE_MODE (vectype_in))))
5967 || (CONVERT_EXPR_CODE_P (code)
5968 && !tree_nop_conversion_p (TREE_TYPE (vectype),
5969 TREE_TYPE (vectype_in))))
5970 return false;
5971
5972 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5973 {
5974 if (dump_enabled_p ())
5975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5976 "can't convert between boolean and non "
5977 "boolean vectors %T\n", TREE_TYPE (op));
5978
5979 return false;
5980 }
5981
5982 /* We do not handle bit-precision changes. */
5983 if ((CONVERT_EXPR_CODE_P (code)
5984 || code == VIEW_CONVERT_EXPR)
5985 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5986 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5987 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
5988 && !type_has_mode_precision_p (TREE_TYPE (op))))
5989 /* But a conversion that does not change the bit-pattern is ok. */
5990 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5991 && INTEGRAL_TYPE_P (TREE_TYPE (op))
5992 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5993 > TYPE_PRECISION (TREE_TYPE (op)))
5994 && TYPE_UNSIGNED (TREE_TYPE (op)))
5995 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5996 == TYPE_PRECISION (TREE_TYPE (op))))))
5997 {
5998 if (dump_enabled_p ())
5999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6000 "type conversion to/from bit-precision "
6001 "unsupported.\n");
6002 return false;
6003 }
6004
6005 if (!vec_stmt) /* transformation not required. */
6006 {
6007 if (slp_node
6008 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6009 {
6010 if (dump_enabled_p ())
6011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6012 "incompatible vector types for invariants\n");
6013 return false;
6014 }
6015 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6016 DUMP_VECT_SCOPE ("vectorizable_assignment");
6017 if (!vect_nop_conversion_p (stmt_info))
6018 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6019 cost_vec);
6020 return true;
6021 }
6022
6023 /* Transform. */
6024 if (dump_enabled_p ())
6025 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6026
6027 /* Handle def. */
6028 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6029
6030 /* Handle use. */
6031 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6032
6033 /* Arguments are ready. create the new vector stmt. */
6034 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6035 {
6036 if (CONVERT_EXPR_CODE_P (code)
6037 || code == VIEW_CONVERT_EXPR)
6038 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6039 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6040 new_temp = make_ssa_name (vec_dest, new_stmt);
6041 gimple_assign_set_lhs (new_stmt, new_temp);
6042 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6043 if (slp_node)
6044 slp_node->push_vec_def (new_stmt);
6045 else
6046 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6047 }
6048 if (!slp_node)
6049 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6050
6051 vec_oprnds.release ();
6052 return true;
6053 }
6054
6055
6056 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6057 either as shift by a scalar or by a vector. */
6058
6059 bool
6060 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6061 {
6062
6063 machine_mode vec_mode;
6064 optab optab;
6065 int icode;
6066 tree vectype;
6067
6068 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6069 if (!vectype)
6070 return false;
6071
6072 optab = optab_for_tree_code (code, vectype, optab_scalar);
6073 if (!optab
6074 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6075 {
6076 optab = optab_for_tree_code (code, vectype, optab_vector);
6077 if (!optab
6078 || (optab_handler (optab, TYPE_MODE (vectype))
6079 == CODE_FOR_nothing))
6080 return false;
6081 }
6082
6083 vec_mode = TYPE_MODE (vectype);
6084 icode = (int) optab_handler (optab, vec_mode);
6085 if (icode == CODE_FOR_nothing)
6086 return false;
6087
6088 return true;
6089 }
6090
6091
6092 /* Function vectorizable_shift.
6093
6094 Check if STMT_INFO performs a shift operation that can be vectorized.
6095 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6096 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6097 Return true if STMT_INFO is vectorizable in this way. */
6098
6099 static bool
6100 vectorizable_shift (vec_info *vinfo,
6101 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6102 gimple **vec_stmt, slp_tree slp_node,
6103 stmt_vector_for_cost *cost_vec)
6104 {
6105 tree vec_dest;
6106 tree scalar_dest;
6107 tree op0, op1 = NULL;
6108 tree vec_oprnd1 = NULL_TREE;
6109 tree vectype;
6110 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6111 enum tree_code code;
6112 machine_mode vec_mode;
6113 tree new_temp;
6114 optab optab;
6115 int icode;
6116 machine_mode optab_op2_mode;
6117 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6118 int ndts = 2;
6119 poly_uint64 nunits_in;
6120 poly_uint64 nunits_out;
6121 tree vectype_out;
6122 tree op1_vectype;
6123 int ncopies;
6124 int i;
6125 vec<tree> vec_oprnds0 = vNULL;
6126 vec<tree> vec_oprnds1 = vNULL;
6127 tree vop0, vop1;
6128 unsigned int k;
6129 bool scalar_shift_arg = true;
6130 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6131 bool incompatible_op1_vectype_p = false;
6132
6133 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6134 return false;
6135
6136 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6137 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6138 && ! vec_stmt)
6139 return false;
6140
6141 /* Is STMT a vectorizable binary/unary operation? */
6142 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6143 if (!stmt)
6144 return false;
6145
6146 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6147 return false;
6148
6149 code = gimple_assign_rhs_code (stmt);
6150
6151 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6152 || code == RROTATE_EXPR))
6153 return false;
6154
6155 scalar_dest = gimple_assign_lhs (stmt);
6156 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6157 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6158 {
6159 if (dump_enabled_p ())
6160 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6161 "bit-precision shifts not supported.\n");
6162 return false;
6163 }
6164
6165 slp_tree slp_op0;
6166 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6167 0, &op0, &slp_op0, &dt[0], &vectype))
6168 {
6169 if (dump_enabled_p ())
6170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6171 "use not simple.\n");
6172 return false;
6173 }
6174 /* If op0 is an external or constant def, infer the vector type
6175 from the scalar type. */
6176 if (!vectype)
6177 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6178 if (vec_stmt)
6179 gcc_assert (vectype);
6180 if (!vectype)
6181 {
6182 if (dump_enabled_p ())
6183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6184 "no vectype for scalar type\n");
6185 return false;
6186 }
6187
6188 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6189 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6190 if (maybe_ne (nunits_out, nunits_in))
6191 return false;
6192
6193 stmt_vec_info op1_def_stmt_info;
6194 slp_tree slp_op1;
6195 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6196 &dt[1], &op1_vectype, &op1_def_stmt_info))
6197 {
6198 if (dump_enabled_p ())
6199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6200 "use not simple.\n");
6201 return false;
6202 }
6203
6204 /* Multiple types in SLP are handled by creating the appropriate number of
6205 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6206 case of SLP. */
6207 if (slp_node)
6208 ncopies = 1;
6209 else
6210 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6211
6212 gcc_assert (ncopies >= 1);
6213
6214 /* Determine whether the shift amount is a vector, or scalar. If the
6215 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6216
6217 if ((dt[1] == vect_internal_def
6218 || dt[1] == vect_induction_def
6219 || dt[1] == vect_nested_cycle)
6220 && !slp_node)
6221 scalar_shift_arg = false;
6222 else if (dt[1] == vect_constant_def
6223 || dt[1] == vect_external_def
6224 || dt[1] == vect_internal_def)
6225 {
6226 /* In SLP, need to check whether the shift count is the same,
6227 in loops if it is a constant or invariant, it is always
6228 a scalar shift. */
6229 if (slp_node)
6230 {
6231 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6232 stmt_vec_info slpstmt_info;
6233
6234 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6235 {
6236 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6237 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6238 scalar_shift_arg = false;
6239 }
6240
6241 /* For internal SLP defs we have to make sure we see scalar stmts
6242 for all vector elements.
6243 ??? For different vectors we could resort to a different
6244 scalar shift operand but code-generation below simply always
6245 takes the first. */
6246 if (dt[1] == vect_internal_def
6247 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6248 stmts.length ()))
6249 scalar_shift_arg = false;
6250 }
6251
6252 /* If the shift amount is computed by a pattern stmt we cannot
6253 use the scalar amount directly thus give up and use a vector
6254 shift. */
6255 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6256 scalar_shift_arg = false;
6257 }
6258 else
6259 {
6260 if (dump_enabled_p ())
6261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6262 "operand mode requires invariant argument.\n");
6263 return false;
6264 }
6265
6266 /* Vector shifted by vector. */
6267 bool was_scalar_shift_arg = scalar_shift_arg;
6268 if (!scalar_shift_arg)
6269 {
6270 optab = optab_for_tree_code (code, vectype, optab_vector);
6271 if (dump_enabled_p ())
6272 dump_printf_loc (MSG_NOTE, vect_location,
6273 "vector/vector shift/rotate found.\n");
6274
6275 if (!op1_vectype)
6276 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6277 slp_op1);
6278 incompatible_op1_vectype_p
6279 = (op1_vectype == NULL_TREE
6280 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6281 TYPE_VECTOR_SUBPARTS (vectype))
6282 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6283 if (incompatible_op1_vectype_p
6284 && (!slp_node
6285 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6286 || slp_op1->refcnt != 1))
6287 {
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6290 "unusable type for last operand in"
6291 " vector/vector shift/rotate.\n");
6292 return false;
6293 }
6294 }
6295 /* See if the machine has a vector shifted by scalar insn and if not
6296 then see if it has a vector shifted by vector insn. */
6297 else
6298 {
6299 optab = optab_for_tree_code (code, vectype, optab_scalar);
6300 if (optab
6301 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6302 {
6303 if (dump_enabled_p ())
6304 dump_printf_loc (MSG_NOTE, vect_location,
6305 "vector/scalar shift/rotate found.\n");
6306 }
6307 else
6308 {
6309 optab = optab_for_tree_code (code, vectype, optab_vector);
6310 if (optab
6311 && (optab_handler (optab, TYPE_MODE (vectype))
6312 != CODE_FOR_nothing))
6313 {
6314 scalar_shift_arg = false;
6315
6316 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_NOTE, vect_location,
6318 "vector/vector shift/rotate found.\n");
6319
6320 if (!op1_vectype)
6321 op1_vectype = get_vectype_for_scalar_type (vinfo,
6322 TREE_TYPE (op1),
6323 slp_op1);
6324
6325 /* Unlike the other binary operators, shifts/rotates have
6326 the rhs being int, instead of the same type as the lhs,
6327 so make sure the scalar is the right type if we are
6328 dealing with vectors of long long/long/short/char. */
6329 incompatible_op1_vectype_p
6330 = (!op1_vectype
6331 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6332 TREE_TYPE (op1)));
6333 if (incompatible_op1_vectype_p
6334 && dt[1] == vect_internal_def)
6335 {
6336 if (dump_enabled_p ())
6337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6338 "unusable type for last operand in"
6339 " vector/vector shift/rotate.\n");
6340 return false;
6341 }
6342 }
6343 }
6344 }
6345
6346 /* Supportable by target? */
6347 if (!optab)
6348 {
6349 if (dump_enabled_p ())
6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6351 "no optab.\n");
6352 return false;
6353 }
6354 vec_mode = TYPE_MODE (vectype);
6355 icode = (int) optab_handler (optab, vec_mode);
6356 if (icode == CODE_FOR_nothing)
6357 {
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6360 "op not supported by target.\n");
6361 return false;
6362 }
6363 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6364 if (vect_emulated_vector_p (vectype))
6365 return false;
6366
6367 if (!vec_stmt) /* transformation not required. */
6368 {
6369 if (slp_node
6370 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6371 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6372 && (!incompatible_op1_vectype_p
6373 || dt[1] == vect_constant_def)
6374 && !vect_maybe_update_slp_op_vectype
6375 (slp_op1,
6376 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6377 {
6378 if (dump_enabled_p ())
6379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6380 "incompatible vector types for invariants\n");
6381 return false;
6382 }
6383 /* Now adjust the constant shift amount in place. */
6384 if (slp_node
6385 && incompatible_op1_vectype_p
6386 && dt[1] == vect_constant_def)
6387 {
6388 for (unsigned i = 0;
6389 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6390 {
6391 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6392 = fold_convert (TREE_TYPE (vectype),
6393 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6394 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6395 == INTEGER_CST));
6396 }
6397 }
6398 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6399 DUMP_VECT_SCOPE ("vectorizable_shift");
6400 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6401 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6402 return true;
6403 }
6404
6405 /* Transform. */
6406
6407 if (dump_enabled_p ())
6408 dump_printf_loc (MSG_NOTE, vect_location,
6409 "transform binary/unary operation.\n");
6410
6411 if (incompatible_op1_vectype_p && !slp_node)
6412 {
6413 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6414 op1 = fold_convert (TREE_TYPE (vectype), op1);
6415 if (dt[1] != vect_constant_def)
6416 op1 = vect_init_vector (vinfo, stmt_info, op1,
6417 TREE_TYPE (vectype), NULL);
6418 }
6419
6420 /* Handle def. */
6421 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6422
6423 if (scalar_shift_arg && dt[1] != vect_internal_def)
6424 {
6425 /* Vector shl and shr insn patterns can be defined with scalar
6426 operand 2 (shift operand). In this case, use constant or loop
6427 invariant op1 directly, without extending it to vector mode
6428 first. */
6429 optab_op2_mode = insn_data[icode].operand[2].mode;
6430 if (!VECTOR_MODE_P (optab_op2_mode))
6431 {
6432 if (dump_enabled_p ())
6433 dump_printf_loc (MSG_NOTE, vect_location,
6434 "operand 1 using scalar mode.\n");
6435 vec_oprnd1 = op1;
6436 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6437 vec_oprnds1.quick_push (vec_oprnd1);
6438 /* Store vec_oprnd1 for every vector stmt to be created.
6439 We check during the analysis that all the shift arguments
6440 are the same.
6441 TODO: Allow different constants for different vector
6442 stmts generated for an SLP instance. */
6443 for (k = 0;
6444 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6445 vec_oprnds1.quick_push (vec_oprnd1);
6446 }
6447 }
6448 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6449 {
6450 if (was_scalar_shift_arg)
6451 {
6452 /* If the argument was the same in all lanes create
6453 the correctly typed vector shift amount directly. */
6454 op1 = fold_convert (TREE_TYPE (vectype), op1);
6455 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6456 !loop_vinfo ? gsi : NULL);
6457 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6458 !loop_vinfo ? gsi : NULL);
6459 vec_oprnds1.create (slp_node->vec_stmts_size);
6460 for (k = 0; k < slp_node->vec_stmts_size; k++)
6461 vec_oprnds1.quick_push (vec_oprnd1);
6462 }
6463 else if (dt[1] == vect_constant_def)
6464 /* The constant shift amount has been adjusted in place. */
6465 ;
6466 else
6467 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6468 }
6469
6470 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6471 (a special case for certain kind of vector shifts); otherwise,
6472 operand 1 should be of a vector type (the usual case). */
6473 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6474 op0, &vec_oprnds0,
6475 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6476
6477 /* Arguments are ready. Create the new vector stmt. */
6478 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6479 {
6480 /* For internal defs where we need to use a scalar shift arg
6481 extract the first lane. */
6482 if (scalar_shift_arg && dt[1] == vect_internal_def)
6483 {
6484 vop1 = vec_oprnds1[0];
6485 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6486 gassign *new_stmt
6487 = gimple_build_assign (new_temp,
6488 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6489 vop1,
6490 TYPE_SIZE (TREE_TYPE (new_temp)),
6491 bitsize_zero_node));
6492 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6493 vop1 = new_temp;
6494 }
6495 else
6496 vop1 = vec_oprnds1[i];
6497 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6498 new_temp = make_ssa_name (vec_dest, new_stmt);
6499 gimple_assign_set_lhs (new_stmt, new_temp);
6500 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6501 if (slp_node)
6502 slp_node->push_vec_def (new_stmt);
6503 else
6504 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6505 }
6506
6507 if (!slp_node)
6508 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6509
6510 vec_oprnds0.release ();
6511 vec_oprnds1.release ();
6512
6513 return true;
6514 }
6515
6516 /* Function vectorizable_operation.
6517
6518 Check if STMT_INFO performs a binary, unary or ternary operation that can
6519 be vectorized.
6520 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6521 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6522 Return true if STMT_INFO is vectorizable in this way. */
6523
6524 static bool
6525 vectorizable_operation (vec_info *vinfo,
6526 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6527 gimple **vec_stmt, slp_tree slp_node,
6528 stmt_vector_for_cost *cost_vec)
6529 {
6530 tree vec_dest;
6531 tree scalar_dest;
6532 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6533 tree vectype;
6534 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6535 enum tree_code code, orig_code;
6536 machine_mode vec_mode;
6537 tree new_temp;
6538 int op_type;
6539 optab optab;
6540 bool target_support_p;
6541 enum vect_def_type dt[3]
6542 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6543 int ndts = 3;
6544 poly_uint64 nunits_in;
6545 poly_uint64 nunits_out;
6546 tree vectype_out;
6547 int ncopies, vec_num;
6548 int i;
6549 vec<tree> vec_oprnds0 = vNULL;
6550 vec<tree> vec_oprnds1 = vNULL;
6551 vec<tree> vec_oprnds2 = vNULL;
6552 tree vop0, vop1, vop2;
6553 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6554
6555 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6556 return false;
6557
6558 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6559 && ! vec_stmt)
6560 return false;
6561
6562 /* Is STMT a vectorizable binary/unary operation? */
6563 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6564 if (!stmt)
6565 return false;
6566
6567 /* Loads and stores are handled in vectorizable_{load,store}. */
6568 if (STMT_VINFO_DATA_REF (stmt_info))
6569 return false;
6570
6571 orig_code = code = gimple_assign_rhs_code (stmt);
6572
6573 /* Shifts are handled in vectorizable_shift. */
6574 if (code == LSHIFT_EXPR
6575 || code == RSHIFT_EXPR
6576 || code == LROTATE_EXPR
6577 || code == RROTATE_EXPR)
6578 return false;
6579
6580 /* Comparisons are handled in vectorizable_comparison. */
6581 if (TREE_CODE_CLASS (code) == tcc_comparison)
6582 return false;
6583
6584 /* Conditions are handled in vectorizable_condition. */
6585 if (code == COND_EXPR)
6586 return false;
6587
6588 /* For pointer addition and subtraction, we should use the normal
6589 plus and minus for the vector operation. */
6590 if (code == POINTER_PLUS_EXPR)
6591 code = PLUS_EXPR;
6592 if (code == POINTER_DIFF_EXPR)
6593 code = MINUS_EXPR;
6594
6595 /* Support only unary or binary operations. */
6596 op_type = TREE_CODE_LENGTH (code);
6597 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6598 {
6599 if (dump_enabled_p ())
6600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6601 "num. args = %d (not unary/binary/ternary op).\n",
6602 op_type);
6603 return false;
6604 }
6605
6606 scalar_dest = gimple_assign_lhs (stmt);
6607 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6608
6609 /* Most operations cannot handle bit-precision types without extra
6610 truncations. */
6611 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6612 if (!mask_op_p
6613 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6614 /* Exception are bitwise binary operations. */
6615 && code != BIT_IOR_EXPR
6616 && code != BIT_XOR_EXPR
6617 && code != BIT_AND_EXPR)
6618 {
6619 if (dump_enabled_p ())
6620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6621 "bit-precision arithmetic not supported.\n");
6622 return false;
6623 }
6624
6625 slp_tree slp_op0;
6626 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6627 0, &op0, &slp_op0, &dt[0], &vectype))
6628 {
6629 if (dump_enabled_p ())
6630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6631 "use not simple.\n");
6632 return false;
6633 }
6634 bool is_invariant = (dt[0] == vect_external_def
6635 || dt[0] == vect_constant_def);
6636 /* If op0 is an external or constant def, infer the vector type
6637 from the scalar type. */
6638 if (!vectype)
6639 {
6640 /* For boolean type we cannot determine vectype by
6641 invariant value (don't know whether it is a vector
6642 of booleans or vector of integers). We use output
6643 vectype because operations on boolean don't change
6644 type. */
6645 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6646 {
6647 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6648 {
6649 if (dump_enabled_p ())
6650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6651 "not supported operation on bool value.\n");
6652 return false;
6653 }
6654 vectype = vectype_out;
6655 }
6656 else
6657 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6658 slp_node);
6659 }
6660 if (vec_stmt)
6661 gcc_assert (vectype);
6662 if (!vectype)
6663 {
6664 if (dump_enabled_p ())
6665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6666 "no vectype for scalar type %T\n",
6667 TREE_TYPE (op0));
6668
6669 return false;
6670 }
6671
6672 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6673 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6674 if (maybe_ne (nunits_out, nunits_in)
6675 || !tree_nop_conversion_p (TREE_TYPE (vectype_out), TREE_TYPE (vectype)))
6676 return false;
6677
6678 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6679 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6680 if (op_type == binary_op || op_type == ternary_op)
6681 {
6682 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6683 1, &op1, &slp_op1, &dt[1], &vectype2))
6684 {
6685 if (dump_enabled_p ())
6686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6687 "use not simple.\n");
6688 return false;
6689 }
6690 is_invariant &= (dt[1] == vect_external_def
6691 || dt[1] == vect_constant_def);
6692 if (vectype2
6693 && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2))
6694 || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6695 TREE_TYPE (vectype2))))
6696 return false;
6697 }
6698 if (op_type == ternary_op)
6699 {
6700 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6701 2, &op2, &slp_op2, &dt[2], &vectype3))
6702 {
6703 if (dump_enabled_p ())
6704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6705 "use not simple.\n");
6706 return false;
6707 }
6708 is_invariant &= (dt[2] == vect_external_def
6709 || dt[2] == vect_constant_def);
6710 if (vectype3
6711 && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3))
6712 || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6713 TREE_TYPE (vectype3))))
6714 return false;
6715 }
6716
6717 /* Multiple types in SLP are handled by creating the appropriate number of
6718 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6719 case of SLP. */
6720 if (slp_node)
6721 {
6722 ncopies = 1;
6723 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6724 }
6725 else
6726 {
6727 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6728 vec_num = 1;
6729 }
6730
6731 gcc_assert (ncopies >= 1);
6732
6733 /* Reject attempts to combine mask types with nonmask types, e.g. if
6734 we have an AND between a (nonmask) boolean loaded from memory and
6735 a (mask) boolean result of a comparison.
6736
6737 TODO: We could easily fix these cases up using pattern statements. */
6738 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6739 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6740 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6741 {
6742 if (dump_enabled_p ())
6743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6744 "mixed mask and nonmask vector types\n");
6745 return false;
6746 }
6747
6748 /* Supportable by target? */
6749
6750 vec_mode = TYPE_MODE (vectype);
6751 if (code == MULT_HIGHPART_EXPR)
6752 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6753 else
6754 {
6755 optab = optab_for_tree_code (code, vectype, optab_default);
6756 if (!optab)
6757 {
6758 if (dump_enabled_p ())
6759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6760 "no optab.\n");
6761 return false;
6762 }
6763 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6764 || optab_libfunc (optab, vec_mode));
6765 }
6766
6767 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6768 if (!target_support_p || using_emulated_vectors_p)
6769 {
6770 if (dump_enabled_p ())
6771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6772 "op not supported by target.\n");
6773 /* When vec_mode is not a vector mode and we verified ops we
6774 do not have to lower like AND are natively supported let
6775 those through even when the mode isn't word_mode. For
6776 ops we have to lower the lowering code assumes we are
6777 dealing with word_mode. */
6778 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
6779 || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6780 || !target_support_p)
6781 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6782 /* Check only during analysis. */
6783 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6784 {
6785 if (dump_enabled_p ())
6786 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6787 return false;
6788 }
6789 if (dump_enabled_p ())
6790 dump_printf_loc (MSG_NOTE, vect_location,
6791 "proceeding using word mode.\n");
6792 using_emulated_vectors_p = true;
6793 }
6794
6795 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6796 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6797 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6798 internal_fn cond_fn = get_conditional_internal_fn (code);
6799 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6800
6801 /* If operating on inactive elements could generate spurious traps,
6802 we need to restrict the operation to active lanes. Note that this
6803 specifically doesn't apply to unhoisted invariants, since they
6804 operate on the same value for every lane.
6805
6806 Similarly, if this operation is part of a reduction, a fully-masked
6807 loop should only change the active lanes of the reduction chain,
6808 keeping the inactive lanes as-is. */
6809 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6810 || reduc_idx >= 0);
6811
6812 if (!vec_stmt) /* transformation not required. */
6813 {
6814 if (loop_vinfo
6815 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6816 && mask_out_inactive)
6817 {
6818 if (cond_len_fn != IFN_LAST
6819 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6820 OPTIMIZE_FOR_SPEED))
6821 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6822 1);
6823 else if (cond_fn != IFN_LAST
6824 && direct_internal_fn_supported_p (cond_fn, vectype,
6825 OPTIMIZE_FOR_SPEED))
6826 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6827 vectype, NULL);
6828 else
6829 {
6830 if (dump_enabled_p ())
6831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6832 "can't use a fully-masked loop because no"
6833 " conditional operation is available.\n");
6834 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6835 }
6836 }
6837
6838 /* Put types on constant and invariant SLP children. */
6839 if (slp_node
6840 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6841 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6842 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6843 {
6844 if (dump_enabled_p ())
6845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6846 "incompatible vector types for invariants\n");
6847 return false;
6848 }
6849
6850 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6851 DUMP_VECT_SCOPE ("vectorizable_operation");
6852 vect_model_simple_cost (vinfo, stmt_info,
6853 ncopies, dt, ndts, slp_node, cost_vec);
6854 if (using_emulated_vectors_p)
6855 {
6856 /* The above vect_model_simple_cost call handles constants
6857 in the prologue and (mis-)costs one of the stmts as
6858 vector stmt. See below for the actual lowering that will
6859 be applied. */
6860 unsigned n
6861 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6862 switch (code)
6863 {
6864 case PLUS_EXPR:
6865 n *= 5;
6866 break;
6867 case MINUS_EXPR:
6868 n *= 6;
6869 break;
6870 case NEGATE_EXPR:
6871 n *= 4;
6872 break;
6873 default:
6874 /* Bit operations do not have extra cost and are accounted
6875 as vector stmt by vect_model_simple_cost. */
6876 n = 0;
6877 break;
6878 }
6879 if (n != 0)
6880 {
6881 /* We also need to materialize two large constants. */
6882 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6883 0, vect_prologue);
6884 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6885 0, vect_body);
6886 }
6887 }
6888 return true;
6889 }
6890
6891 /* Transform. */
6892
6893 if (dump_enabled_p ())
6894 dump_printf_loc (MSG_NOTE, vect_location,
6895 "transform binary/unary operation.\n");
6896
6897 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6898 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6899
6900 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6901 vectors with unsigned elements, but the result is signed. So, we
6902 need to compute the MINUS_EXPR into vectype temporary and
6903 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6904 tree vec_cvt_dest = NULL_TREE;
6905 if (orig_code == POINTER_DIFF_EXPR)
6906 {
6907 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6908 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6909 }
6910 /* Handle def. */
6911 else
6912 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6913
6914 /* In case the vectorization factor (VF) is bigger than the number
6915 of elements that we can fit in a vectype (nunits), we have to generate
6916 more than one vector stmt - i.e - we need to "unroll" the
6917 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6918 from one copy of the vector stmt to the next, in the field
6919 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6920 stages to find the correct vector defs to be used when vectorizing
6921 stmts that use the defs of the current stmt. The example below
6922 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6923 we need to create 4 vectorized stmts):
6924
6925 before vectorization:
6926 RELATED_STMT VEC_STMT
6927 S1: x = memref - -
6928 S2: z = x + 1 - -
6929
6930 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6931 there):
6932 RELATED_STMT VEC_STMT
6933 VS1_0: vx0 = memref0 VS1_1 -
6934 VS1_1: vx1 = memref1 VS1_2 -
6935 VS1_2: vx2 = memref2 VS1_3 -
6936 VS1_3: vx3 = memref3 - -
6937 S1: x = load - VS1_0
6938 S2: z = x + 1 - -
6939
6940 step2: vectorize stmt S2 (done here):
6941 To vectorize stmt S2 we first need to find the relevant vector
6942 def for the first operand 'x'. This is, as usual, obtained from
6943 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6944 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6945 relevant vector def 'vx0'. Having found 'vx0' we can generate
6946 the vector stmt VS2_0, and as usual, record it in the
6947 STMT_VINFO_VEC_STMT of stmt S2.
6948 When creating the second copy (VS2_1), we obtain the relevant vector
6949 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6950 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6951 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6952 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6953 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6954 chain of stmts and pointers:
6955 RELATED_STMT VEC_STMT
6956 VS1_0: vx0 = memref0 VS1_1 -
6957 VS1_1: vx1 = memref1 VS1_2 -
6958 VS1_2: vx2 = memref2 VS1_3 -
6959 VS1_3: vx3 = memref3 - -
6960 S1: x = load - VS1_0
6961 VS2_0: vz0 = vx0 + v1 VS2_1 -
6962 VS2_1: vz1 = vx1 + v1 VS2_2 -
6963 VS2_2: vz2 = vx2 + v1 VS2_3 -
6964 VS2_3: vz3 = vx3 + v1 - -
6965 S2: z = x + 1 - VS2_0 */
6966
6967 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6968 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6969 /* Arguments are ready. Create the new vector stmt. */
6970 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6971 {
6972 gimple *new_stmt = NULL;
6973 vop1 = ((op_type == binary_op || op_type == ternary_op)
6974 ? vec_oprnds1[i] : NULL_TREE);
6975 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6976 if (using_emulated_vectors_p
6977 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6978 {
6979 /* Lower the operation. This follows vector lowering. */
6980 unsigned int width = vector_element_bits (vectype);
6981 tree inner_type = TREE_TYPE (vectype);
6982 tree word_type
6983 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6984 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6985 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6986 tree high_bits
6987 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6988 tree wvop0 = make_ssa_name (word_type);
6989 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6990 build1 (VIEW_CONVERT_EXPR,
6991 word_type, vop0));
6992 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6993 tree result_low, signs;
6994 if (code == PLUS_EXPR || code == MINUS_EXPR)
6995 {
6996 tree wvop1 = make_ssa_name (word_type);
6997 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6998 build1 (VIEW_CONVERT_EXPR,
6999 word_type, vop1));
7000 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7001 signs = make_ssa_name (word_type);
7002 new_stmt = gimple_build_assign (signs,
7003 BIT_XOR_EXPR, wvop0, wvop1);
7004 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7005 tree b_low = make_ssa_name (word_type);
7006 new_stmt = gimple_build_assign (b_low,
7007 BIT_AND_EXPR, wvop1, low_bits);
7008 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7009 tree a_low = make_ssa_name (word_type);
7010 if (code == PLUS_EXPR)
7011 new_stmt = gimple_build_assign (a_low,
7012 BIT_AND_EXPR, wvop0, low_bits);
7013 else
7014 new_stmt = gimple_build_assign (a_low,
7015 BIT_IOR_EXPR, wvop0, high_bits);
7016 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7017 if (code == MINUS_EXPR)
7018 {
7019 new_stmt = gimple_build_assign (NULL_TREE,
7020 BIT_NOT_EXPR, signs);
7021 signs = make_ssa_name (word_type);
7022 gimple_assign_set_lhs (new_stmt, signs);
7023 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7024 }
7025 new_stmt = gimple_build_assign (NULL_TREE,
7026 BIT_AND_EXPR, signs, high_bits);
7027 signs = make_ssa_name (word_type);
7028 gimple_assign_set_lhs (new_stmt, signs);
7029 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7030 result_low = make_ssa_name (word_type);
7031 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7032 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7033 }
7034 else
7035 {
7036 tree a_low = make_ssa_name (word_type);
7037 new_stmt = gimple_build_assign (a_low,
7038 BIT_AND_EXPR, wvop0, low_bits);
7039 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7040 signs = make_ssa_name (word_type);
7041 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7042 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7043 new_stmt = gimple_build_assign (NULL_TREE,
7044 BIT_AND_EXPR, signs, high_bits);
7045 signs = make_ssa_name (word_type);
7046 gimple_assign_set_lhs (new_stmt, signs);
7047 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7048 result_low = make_ssa_name (word_type);
7049 new_stmt = gimple_build_assign (result_low,
7050 MINUS_EXPR, high_bits, a_low);
7051 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7052 }
7053 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7054 signs);
7055 result_low = make_ssa_name (word_type);
7056 gimple_assign_set_lhs (new_stmt, result_low);
7057 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7058 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7059 build1 (VIEW_CONVERT_EXPR,
7060 vectype, result_low));
7061 new_temp = make_ssa_name (vectype);
7062 gimple_assign_set_lhs (new_stmt, new_temp);
7063 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7064 }
7065 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7066 {
7067 tree mask;
7068 if (masked_loop_p)
7069 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7070 vec_num * ncopies, vectype, i);
7071 else
7072 /* Dummy mask. */
7073 mask = build_minus_one_cst (truth_type_for (vectype));
7074 auto_vec<tree> vops (6);
7075 vops.quick_push (mask);
7076 vops.quick_push (vop0);
7077 if (vop1)
7078 vops.quick_push (vop1);
7079 if (vop2)
7080 vops.quick_push (vop2);
7081 if (reduc_idx >= 0)
7082 {
7083 /* Perform the operation on active elements only and take
7084 inactive elements from the reduction chain input. */
7085 gcc_assert (!vop2);
7086 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7087 }
7088 else
7089 {
7090 auto else_value = targetm.preferred_else_value
7091 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7092 vops.quick_push (else_value);
7093 }
7094 if (len_loop_p)
7095 {
7096 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7097 vec_num * ncopies, vectype, i, 1);
7098 signed char biasval
7099 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7100 tree bias = build_int_cst (intQI_type_node, biasval);
7101 vops.quick_push (len);
7102 vops.quick_push (bias);
7103 }
7104 gcall *call
7105 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7106 : cond_len_fn,
7107 vops);
7108 new_temp = make_ssa_name (vec_dest, call);
7109 gimple_call_set_lhs (call, new_temp);
7110 gimple_call_set_nothrow (call, true);
7111 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7112 new_stmt = call;
7113 }
7114 else
7115 {
7116 tree mask = NULL_TREE;
7117 /* When combining two masks check if either of them is elsewhere
7118 combined with a loop mask, if that's the case we can mark that the
7119 new combined mask doesn't need to be combined with a loop mask. */
7120 if (masked_loop_p
7121 && code == BIT_AND_EXPR
7122 && VECTOR_BOOLEAN_TYPE_P (vectype))
7123 {
7124 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7125 ncopies}))
7126 {
7127 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7128 vec_num * ncopies, vectype, i);
7129
7130 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7131 vop0, gsi);
7132 }
7133
7134 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7135 ncopies }))
7136 {
7137 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7138 vec_num * ncopies, vectype, i);
7139
7140 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7141 vop1, gsi);
7142 }
7143 }
7144
7145 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7146 new_temp = make_ssa_name (vec_dest, new_stmt);
7147 gimple_assign_set_lhs (new_stmt, new_temp);
7148 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7149 if (using_emulated_vectors_p)
7150 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7151
7152 /* Enter the combined value into the vector cond hash so we don't
7153 AND it with a loop mask again. */
7154 if (mask)
7155 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7156 }
7157
7158 if (vec_cvt_dest)
7159 {
7160 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7161 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7162 new_temp);
7163 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7164 gimple_assign_set_lhs (new_stmt, new_temp);
7165 vect_finish_stmt_generation (vinfo, stmt_info,
7166 new_stmt, gsi);
7167 }
7168
7169 if (slp_node)
7170 slp_node->push_vec_def (new_stmt);
7171 else
7172 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7173 }
7174
7175 if (!slp_node)
7176 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7177
7178 vec_oprnds0.release ();
7179 vec_oprnds1.release ();
7180 vec_oprnds2.release ();
7181
7182 return true;
7183 }
7184
7185 /* A helper function to ensure data reference DR_INFO's base alignment. */
7186
7187 static void
7188 ensure_base_align (dr_vec_info *dr_info)
7189 {
7190 /* Alignment is only analyzed for the first element of a DR group,
7191 use that to look at base alignment we need to enforce. */
7192 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7193 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7194
7195 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7196
7197 if (dr_info->base_misaligned)
7198 {
7199 tree base_decl = dr_info->base_decl;
7200
7201 // We should only be able to increase the alignment of a base object if
7202 // we know what its new alignment should be at compile time.
7203 unsigned HOST_WIDE_INT align_base_to =
7204 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7205
7206 if (decl_in_symtab_p (base_decl))
7207 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7208 else if (DECL_ALIGN (base_decl) < align_base_to)
7209 {
7210 SET_DECL_ALIGN (base_decl, align_base_to);
7211 DECL_USER_ALIGN (base_decl) = 1;
7212 }
7213 dr_info->base_misaligned = false;
7214 }
7215 }
7216
7217
7218 /* Function get_group_alias_ptr_type.
7219
7220 Return the alias type for the group starting at FIRST_STMT_INFO. */
7221
7222 static tree
7223 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7224 {
7225 struct data_reference *first_dr, *next_dr;
7226
7227 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7228 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7229 while (next_stmt_info)
7230 {
7231 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7232 if (get_alias_set (DR_REF (first_dr))
7233 != get_alias_set (DR_REF (next_dr)))
7234 {
7235 if (dump_enabled_p ())
7236 dump_printf_loc (MSG_NOTE, vect_location,
7237 "conflicting alias set types.\n");
7238 return ptr_type_node;
7239 }
7240 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7241 }
7242 return reference_alias_ptr_type (DR_REF (first_dr));
7243 }
7244
7245
7246 /* Function scan_operand_equal_p.
7247
7248 Helper function for check_scan_store. Compare two references
7249 with .GOMP_SIMD_LANE bases. */
7250
7251 static bool
7252 scan_operand_equal_p (tree ref1, tree ref2)
7253 {
7254 tree ref[2] = { ref1, ref2 };
7255 poly_int64 bitsize[2], bitpos[2];
7256 tree offset[2], base[2];
7257 for (int i = 0; i < 2; ++i)
7258 {
7259 machine_mode mode;
7260 int unsignedp, reversep, volatilep = 0;
7261 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7262 &offset[i], &mode, &unsignedp,
7263 &reversep, &volatilep);
7264 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7265 return false;
7266 if (TREE_CODE (base[i]) == MEM_REF
7267 && offset[i] == NULL_TREE
7268 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7269 {
7270 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7271 if (is_gimple_assign (def_stmt)
7272 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7273 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7274 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7275 {
7276 if (maybe_ne (mem_ref_offset (base[i]), 0))
7277 return false;
7278 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7279 offset[i] = gimple_assign_rhs2 (def_stmt);
7280 }
7281 }
7282 }
7283
7284 if (!operand_equal_p (base[0], base[1], 0))
7285 return false;
7286 if (maybe_ne (bitsize[0], bitsize[1]))
7287 return false;
7288 if (offset[0] != offset[1])
7289 {
7290 if (!offset[0] || !offset[1])
7291 return false;
7292 if (!operand_equal_p (offset[0], offset[1], 0))
7293 {
7294 tree step[2];
7295 for (int i = 0; i < 2; ++i)
7296 {
7297 step[i] = integer_one_node;
7298 if (TREE_CODE (offset[i]) == SSA_NAME)
7299 {
7300 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7301 if (is_gimple_assign (def_stmt)
7302 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7303 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7304 == INTEGER_CST))
7305 {
7306 step[i] = gimple_assign_rhs2 (def_stmt);
7307 offset[i] = gimple_assign_rhs1 (def_stmt);
7308 }
7309 }
7310 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7311 {
7312 step[i] = TREE_OPERAND (offset[i], 1);
7313 offset[i] = TREE_OPERAND (offset[i], 0);
7314 }
7315 tree rhs1 = NULL_TREE;
7316 if (TREE_CODE (offset[i]) == SSA_NAME)
7317 {
7318 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7319 if (gimple_assign_cast_p (def_stmt))
7320 rhs1 = gimple_assign_rhs1 (def_stmt);
7321 }
7322 else if (CONVERT_EXPR_P (offset[i]))
7323 rhs1 = TREE_OPERAND (offset[i], 0);
7324 if (rhs1
7325 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7326 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7327 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7328 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7329 offset[i] = rhs1;
7330 }
7331 if (!operand_equal_p (offset[0], offset[1], 0)
7332 || !operand_equal_p (step[0], step[1], 0))
7333 return false;
7334 }
7335 }
7336 return true;
7337 }
7338
7339
7340 enum scan_store_kind {
7341 /* Normal permutation. */
7342 scan_store_kind_perm,
7343
7344 /* Whole vector left shift permutation with zero init. */
7345 scan_store_kind_lshift_zero,
7346
7347 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7348 scan_store_kind_lshift_cond
7349 };
7350
7351 /* Function check_scan_store.
7352
7353 Verify if we can perform the needed permutations or whole vector shifts.
7354 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7355 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7356 to do at each step. */
7357
7358 static int
7359 scan_store_can_perm_p (tree vectype, tree init,
7360 vec<enum scan_store_kind> *use_whole_vector = NULL)
7361 {
7362 enum machine_mode vec_mode = TYPE_MODE (vectype);
7363 unsigned HOST_WIDE_INT nunits;
7364 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7365 return -1;
7366 int units_log2 = exact_log2 (nunits);
7367 if (units_log2 <= 0)
7368 return -1;
7369
7370 int i;
7371 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7372 for (i = 0; i <= units_log2; ++i)
7373 {
7374 unsigned HOST_WIDE_INT j, k;
7375 enum scan_store_kind kind = scan_store_kind_perm;
7376 vec_perm_builder sel (nunits, nunits, 1);
7377 sel.quick_grow (nunits);
7378 if (i == units_log2)
7379 {
7380 for (j = 0; j < nunits; ++j)
7381 sel[j] = nunits - 1;
7382 }
7383 else
7384 {
7385 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7386 sel[j] = j;
7387 for (k = 0; j < nunits; ++j, ++k)
7388 sel[j] = nunits + k;
7389 }
7390 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7391 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7392 {
7393 if (i == units_log2)
7394 return -1;
7395
7396 if (whole_vector_shift_kind == scan_store_kind_perm)
7397 {
7398 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7399 return -1;
7400 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7401 /* Whole vector shifts shift in zeros, so if init is all zero
7402 constant, there is no need to do anything further. */
7403 if ((TREE_CODE (init) != INTEGER_CST
7404 && TREE_CODE (init) != REAL_CST)
7405 || !initializer_zerop (init))
7406 {
7407 tree masktype = truth_type_for (vectype);
7408 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7409 return -1;
7410 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7411 }
7412 }
7413 kind = whole_vector_shift_kind;
7414 }
7415 if (use_whole_vector)
7416 {
7417 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7418 use_whole_vector->safe_grow_cleared (i, true);
7419 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7420 use_whole_vector->safe_push (kind);
7421 }
7422 }
7423
7424 return units_log2;
7425 }
7426
7427
7428 /* Function check_scan_store.
7429
7430 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7431
7432 static bool
7433 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7434 enum vect_def_type rhs_dt, bool slp, tree mask,
7435 vect_memory_access_type memory_access_type)
7436 {
7437 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7438 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7439 tree ref_type;
7440
7441 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7442 if (slp
7443 || mask
7444 || memory_access_type != VMAT_CONTIGUOUS
7445 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7446 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7447 || loop_vinfo == NULL
7448 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7449 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7450 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7451 || !integer_zerop (DR_INIT (dr_info->dr))
7452 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7453 || !alias_sets_conflict_p (get_alias_set (vectype),
7454 get_alias_set (TREE_TYPE (ref_type))))
7455 {
7456 if (dump_enabled_p ())
7457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7458 "unsupported OpenMP scan store.\n");
7459 return false;
7460 }
7461
7462 /* We need to pattern match code built by OpenMP lowering and simplified
7463 by following optimizations into something we can handle.
7464 #pragma omp simd reduction(inscan,+:r)
7465 for (...)
7466 {
7467 r += something ();
7468 #pragma omp scan inclusive (r)
7469 use (r);
7470 }
7471 shall have body with:
7472 // Initialization for input phase, store the reduction initializer:
7473 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7474 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7475 D.2042[_21] = 0;
7476 // Actual input phase:
7477 ...
7478 r.0_5 = D.2042[_20];
7479 _6 = _4 + r.0_5;
7480 D.2042[_20] = _6;
7481 // Initialization for scan phase:
7482 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7483 _26 = D.2043[_25];
7484 _27 = D.2042[_25];
7485 _28 = _26 + _27;
7486 D.2043[_25] = _28;
7487 D.2042[_25] = _28;
7488 // Actual scan phase:
7489 ...
7490 r.1_8 = D.2042[_20];
7491 ...
7492 The "omp simd array" variable D.2042 holds the privatized copy used
7493 inside of the loop and D.2043 is another one that holds copies of
7494 the current original list item. The separate GOMP_SIMD_LANE ifn
7495 kinds are there in order to allow optimizing the initializer store
7496 and combiner sequence, e.g. if it is originally some C++ish user
7497 defined reduction, but allow the vectorizer to pattern recognize it
7498 and turn into the appropriate vectorized scan.
7499
7500 For exclusive scan, this is slightly different:
7501 #pragma omp simd reduction(inscan,+:r)
7502 for (...)
7503 {
7504 use (r);
7505 #pragma omp scan exclusive (r)
7506 r += something ();
7507 }
7508 shall have body with:
7509 // Initialization for input phase, store the reduction initializer:
7510 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7511 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7512 D.2042[_21] = 0;
7513 // Actual input phase:
7514 ...
7515 r.0_5 = D.2042[_20];
7516 _6 = _4 + r.0_5;
7517 D.2042[_20] = _6;
7518 // Initialization for scan phase:
7519 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7520 _26 = D.2043[_25];
7521 D.2044[_25] = _26;
7522 _27 = D.2042[_25];
7523 _28 = _26 + _27;
7524 D.2043[_25] = _28;
7525 // Actual scan phase:
7526 ...
7527 r.1_8 = D.2044[_20];
7528 ... */
7529
7530 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7531 {
7532 /* Match the D.2042[_21] = 0; store above. Just require that
7533 it is a constant or external definition store. */
7534 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7535 {
7536 fail_init:
7537 if (dump_enabled_p ())
7538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7539 "unsupported OpenMP scan initializer store.\n");
7540 return false;
7541 }
7542
7543 if (! loop_vinfo->scan_map)
7544 loop_vinfo->scan_map = new hash_map<tree, tree>;
7545 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7546 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7547 if (cached)
7548 goto fail_init;
7549 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7550
7551 /* These stores can be vectorized normally. */
7552 return true;
7553 }
7554
7555 if (rhs_dt != vect_internal_def)
7556 {
7557 fail:
7558 if (dump_enabled_p ())
7559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7560 "unsupported OpenMP scan combiner pattern.\n");
7561 return false;
7562 }
7563
7564 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7565 tree rhs = gimple_assign_rhs1 (stmt);
7566 if (TREE_CODE (rhs) != SSA_NAME)
7567 goto fail;
7568
7569 gimple *other_store_stmt = NULL;
7570 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7571 bool inscan_var_store
7572 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7573
7574 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7575 {
7576 if (!inscan_var_store)
7577 {
7578 use_operand_p use_p;
7579 imm_use_iterator iter;
7580 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7581 {
7582 gimple *use_stmt = USE_STMT (use_p);
7583 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7584 continue;
7585 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7586 || !is_gimple_assign (use_stmt)
7587 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7588 || other_store_stmt
7589 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7590 goto fail;
7591 other_store_stmt = use_stmt;
7592 }
7593 if (other_store_stmt == NULL)
7594 goto fail;
7595 rhs = gimple_assign_lhs (other_store_stmt);
7596 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7597 goto fail;
7598 }
7599 }
7600 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7601 {
7602 use_operand_p use_p;
7603 imm_use_iterator iter;
7604 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7605 {
7606 gimple *use_stmt = USE_STMT (use_p);
7607 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7608 continue;
7609 if (other_store_stmt)
7610 goto fail;
7611 other_store_stmt = use_stmt;
7612 }
7613 }
7614 else
7615 goto fail;
7616
7617 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7618 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7619 || !is_gimple_assign (def_stmt)
7620 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7621 goto fail;
7622
7623 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7624 /* For pointer addition, we should use the normal plus for the vector
7625 operation. */
7626 switch (code)
7627 {
7628 case POINTER_PLUS_EXPR:
7629 code = PLUS_EXPR;
7630 break;
7631 case MULT_HIGHPART_EXPR:
7632 goto fail;
7633 default:
7634 break;
7635 }
7636 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7637 goto fail;
7638
7639 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7640 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7641 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7642 goto fail;
7643
7644 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7645 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7646 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7647 || !gimple_assign_load_p (load1_stmt)
7648 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7649 || !gimple_assign_load_p (load2_stmt))
7650 goto fail;
7651
7652 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7653 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7654 if (load1_stmt_info == NULL
7655 || load2_stmt_info == NULL
7656 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7657 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7658 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7659 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7660 goto fail;
7661
7662 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7663 {
7664 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7665 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7666 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7667 goto fail;
7668 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7669 tree lrhs;
7670 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7671 lrhs = rhs1;
7672 else
7673 lrhs = rhs2;
7674 use_operand_p use_p;
7675 imm_use_iterator iter;
7676 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7677 {
7678 gimple *use_stmt = USE_STMT (use_p);
7679 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7680 continue;
7681 if (other_store_stmt)
7682 goto fail;
7683 other_store_stmt = use_stmt;
7684 }
7685 }
7686
7687 if (other_store_stmt == NULL)
7688 goto fail;
7689 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7690 || !gimple_store_p (other_store_stmt))
7691 goto fail;
7692
7693 stmt_vec_info other_store_stmt_info
7694 = loop_vinfo->lookup_stmt (other_store_stmt);
7695 if (other_store_stmt_info == NULL
7696 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7697 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7698 goto fail;
7699
7700 gimple *stmt1 = stmt;
7701 gimple *stmt2 = other_store_stmt;
7702 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7703 std::swap (stmt1, stmt2);
7704 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7705 gimple_assign_rhs1 (load2_stmt)))
7706 {
7707 std::swap (rhs1, rhs2);
7708 std::swap (load1_stmt, load2_stmt);
7709 std::swap (load1_stmt_info, load2_stmt_info);
7710 }
7711 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7712 gimple_assign_rhs1 (load1_stmt)))
7713 goto fail;
7714
7715 tree var3 = NULL_TREE;
7716 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7717 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7718 gimple_assign_rhs1 (load2_stmt)))
7719 goto fail;
7720 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7721 {
7722 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7723 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7724 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7725 goto fail;
7726 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7727 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7728 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7729 || lookup_attribute ("omp simd inscan exclusive",
7730 DECL_ATTRIBUTES (var3)))
7731 goto fail;
7732 }
7733
7734 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7735 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7736 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7737 goto fail;
7738
7739 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7740 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7741 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7742 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7743 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7744 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7745 goto fail;
7746
7747 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7748 std::swap (var1, var2);
7749
7750 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7751 {
7752 if (!lookup_attribute ("omp simd inscan exclusive",
7753 DECL_ATTRIBUTES (var1)))
7754 goto fail;
7755 var1 = var3;
7756 }
7757
7758 if (loop_vinfo->scan_map == NULL)
7759 goto fail;
7760 tree *init = loop_vinfo->scan_map->get (var1);
7761 if (init == NULL)
7762 goto fail;
7763
7764 /* The IL is as expected, now check if we can actually vectorize it.
7765 Inclusive scan:
7766 _26 = D.2043[_25];
7767 _27 = D.2042[_25];
7768 _28 = _26 + _27;
7769 D.2043[_25] = _28;
7770 D.2042[_25] = _28;
7771 should be vectorized as (where _40 is the vectorized rhs
7772 from the D.2042[_21] = 0; store):
7773 _30 = MEM <vector(8) int> [(int *)&D.2043];
7774 _31 = MEM <vector(8) int> [(int *)&D.2042];
7775 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7776 _33 = _31 + _32;
7777 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7778 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7779 _35 = _33 + _34;
7780 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7781 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7782 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7783 _37 = _35 + _36;
7784 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7785 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7786 _38 = _30 + _37;
7787 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7788 MEM <vector(8) int> [(int *)&D.2043] = _39;
7789 MEM <vector(8) int> [(int *)&D.2042] = _38;
7790 Exclusive scan:
7791 _26 = D.2043[_25];
7792 D.2044[_25] = _26;
7793 _27 = D.2042[_25];
7794 _28 = _26 + _27;
7795 D.2043[_25] = _28;
7796 should be vectorized as (where _40 is the vectorized rhs
7797 from the D.2042[_21] = 0; store):
7798 _30 = MEM <vector(8) int> [(int *)&D.2043];
7799 _31 = MEM <vector(8) int> [(int *)&D.2042];
7800 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7801 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7802 _34 = _32 + _33;
7803 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7804 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7805 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7806 _36 = _34 + _35;
7807 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7808 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7809 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7810 _38 = _36 + _37;
7811 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7812 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7813 _39 = _30 + _38;
7814 _50 = _31 + _39;
7815 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7816 MEM <vector(8) int> [(int *)&D.2044] = _39;
7817 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7818 enum machine_mode vec_mode = TYPE_MODE (vectype);
7819 optab optab = optab_for_tree_code (code, vectype, optab_default);
7820 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7821 goto fail;
7822
7823 int units_log2 = scan_store_can_perm_p (vectype, *init);
7824 if (units_log2 == -1)
7825 goto fail;
7826
7827 return true;
7828 }
7829
7830
7831 /* Function vectorizable_scan_store.
7832
7833 Helper of vectorizable_score, arguments like on vectorizable_store.
7834 Handle only the transformation, checking is done in check_scan_store. */
7835
7836 static bool
7837 vectorizable_scan_store (vec_info *vinfo,
7838 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7839 gimple **vec_stmt, int ncopies)
7840 {
7841 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7842 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7843 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7844 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7845
7846 if (dump_enabled_p ())
7847 dump_printf_loc (MSG_NOTE, vect_location,
7848 "transform scan store. ncopies = %d\n", ncopies);
7849
7850 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7851 tree rhs = gimple_assign_rhs1 (stmt);
7852 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7853
7854 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7855 bool inscan_var_store
7856 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7857
7858 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7859 {
7860 use_operand_p use_p;
7861 imm_use_iterator iter;
7862 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7863 {
7864 gimple *use_stmt = USE_STMT (use_p);
7865 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7866 continue;
7867 rhs = gimple_assign_lhs (use_stmt);
7868 break;
7869 }
7870 }
7871
7872 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7873 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7874 if (code == POINTER_PLUS_EXPR)
7875 code = PLUS_EXPR;
7876 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7877 && commutative_tree_code (code));
7878 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7879 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7880 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7881 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7882 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7883 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7884 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7885 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7886 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7887 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7888 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7889
7890 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7891 {
7892 std::swap (rhs1, rhs2);
7893 std::swap (var1, var2);
7894 std::swap (load1_dr_info, load2_dr_info);
7895 }
7896
7897 tree *init = loop_vinfo->scan_map->get (var1);
7898 gcc_assert (init);
7899
7900 unsigned HOST_WIDE_INT nunits;
7901 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7902 gcc_unreachable ();
7903 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7904 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7905 gcc_assert (units_log2 > 0);
7906 auto_vec<tree, 16> perms;
7907 perms.quick_grow (units_log2 + 1);
7908 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7909 for (int i = 0; i <= units_log2; ++i)
7910 {
7911 unsigned HOST_WIDE_INT j, k;
7912 vec_perm_builder sel (nunits, nunits, 1);
7913 sel.quick_grow (nunits);
7914 if (i == units_log2)
7915 for (j = 0; j < nunits; ++j)
7916 sel[j] = nunits - 1;
7917 else
7918 {
7919 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7920 sel[j] = j;
7921 for (k = 0; j < nunits; ++j, ++k)
7922 sel[j] = nunits + k;
7923 }
7924 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7925 if (!use_whole_vector.is_empty ()
7926 && use_whole_vector[i] != scan_store_kind_perm)
7927 {
7928 if (zero_vec == NULL_TREE)
7929 zero_vec = build_zero_cst (vectype);
7930 if (masktype == NULL_TREE
7931 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7932 masktype = truth_type_for (vectype);
7933 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7934 }
7935 else
7936 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7937 }
7938
7939 tree vec_oprnd1 = NULL_TREE;
7940 tree vec_oprnd2 = NULL_TREE;
7941 tree vec_oprnd3 = NULL_TREE;
7942 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7943 tree dataref_offset = build_int_cst (ref_type, 0);
7944 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7945 vectype, VMAT_CONTIGUOUS);
7946 tree ldataref_ptr = NULL_TREE;
7947 tree orig = NULL_TREE;
7948 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7949 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7950 auto_vec<tree> vec_oprnds1;
7951 auto_vec<tree> vec_oprnds2;
7952 auto_vec<tree> vec_oprnds3;
7953 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7954 *init, &vec_oprnds1,
7955 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7956 rhs2, &vec_oprnds3);
7957 for (int j = 0; j < ncopies; j++)
7958 {
7959 vec_oprnd1 = vec_oprnds1[j];
7960 if (ldataref_ptr == NULL)
7961 vec_oprnd2 = vec_oprnds2[j];
7962 vec_oprnd3 = vec_oprnds3[j];
7963 if (j == 0)
7964 orig = vec_oprnd3;
7965 else if (!inscan_var_store)
7966 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7967
7968 if (ldataref_ptr)
7969 {
7970 vec_oprnd2 = make_ssa_name (vectype);
7971 tree data_ref = fold_build2 (MEM_REF, vectype,
7972 unshare_expr (ldataref_ptr),
7973 dataref_offset);
7974 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7975 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7976 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7977 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7978 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7979 }
7980
7981 tree v = vec_oprnd2;
7982 for (int i = 0; i < units_log2; ++i)
7983 {
7984 tree new_temp = make_ssa_name (vectype);
7985 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7986 (zero_vec
7987 && (use_whole_vector[i]
7988 != scan_store_kind_perm))
7989 ? zero_vec : vec_oprnd1, v,
7990 perms[i]);
7991 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7992 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7993 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7994
7995 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7996 {
7997 /* Whole vector shift shifted in zero bits, but if *init
7998 is not initializer_zerop, we need to replace those elements
7999 with elements from vec_oprnd1. */
8000 tree_vector_builder vb (masktype, nunits, 1);
8001 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8002 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8003 ? boolean_false_node : boolean_true_node);
8004
8005 tree new_temp2 = make_ssa_name (vectype);
8006 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8007 new_temp, vec_oprnd1);
8008 vect_finish_stmt_generation (vinfo, stmt_info,
8009 g, gsi);
8010 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8011 new_temp = new_temp2;
8012 }
8013
8014 /* For exclusive scan, perform the perms[i] permutation once
8015 more. */
8016 if (i == 0
8017 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8018 && v == vec_oprnd2)
8019 {
8020 v = new_temp;
8021 --i;
8022 continue;
8023 }
8024
8025 tree new_temp2 = make_ssa_name (vectype);
8026 g = gimple_build_assign (new_temp2, code, v, new_temp);
8027 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8028 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8029
8030 v = new_temp2;
8031 }
8032
8033 tree new_temp = make_ssa_name (vectype);
8034 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8035 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8036 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8037
8038 tree last_perm_arg = new_temp;
8039 /* For exclusive scan, new_temp computed above is the exclusive scan
8040 prefix sum. Turn it into inclusive prefix sum for the broadcast
8041 of the last element into orig. */
8042 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8043 {
8044 last_perm_arg = make_ssa_name (vectype);
8045 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8046 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8047 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8048 }
8049
8050 orig = make_ssa_name (vectype);
8051 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8052 last_perm_arg, perms[units_log2]);
8053 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8054 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8055
8056 if (!inscan_var_store)
8057 {
8058 tree data_ref = fold_build2 (MEM_REF, vectype,
8059 unshare_expr (dataref_ptr),
8060 dataref_offset);
8061 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8062 g = gimple_build_assign (data_ref, new_temp);
8063 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8064 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8065 }
8066 }
8067
8068 if (inscan_var_store)
8069 for (int j = 0; j < ncopies; j++)
8070 {
8071 if (j != 0)
8072 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8073
8074 tree data_ref = fold_build2 (MEM_REF, vectype,
8075 unshare_expr (dataref_ptr),
8076 dataref_offset);
8077 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8078 gimple *g = gimple_build_assign (data_ref, orig);
8079 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8080 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8081 }
8082 return true;
8083 }
8084
8085
8086 /* Function vectorizable_store.
8087
8088 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8089 that can be vectorized.
8090 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8091 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8092 Return true if STMT_INFO is vectorizable in this way. */
8093
8094 static bool
8095 vectorizable_store (vec_info *vinfo,
8096 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8097 gimple **vec_stmt, slp_tree slp_node,
8098 stmt_vector_for_cost *cost_vec)
8099 {
8100 tree data_ref;
8101 tree vec_oprnd = NULL_TREE;
8102 tree elem_type;
8103 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8104 class loop *loop = NULL;
8105 machine_mode vec_mode;
8106 tree dummy;
8107 enum vect_def_type rhs_dt = vect_unknown_def_type;
8108 enum vect_def_type mask_dt = vect_unknown_def_type;
8109 tree dataref_ptr = NULL_TREE;
8110 tree dataref_offset = NULL_TREE;
8111 gimple *ptr_incr = NULL;
8112 int ncopies;
8113 int j;
8114 stmt_vec_info first_stmt_info;
8115 bool grouped_store;
8116 unsigned int group_size, i;
8117 bool slp = (slp_node != NULL);
8118 unsigned int vec_num;
8119 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8120 tree aggr_type;
8121 gather_scatter_info gs_info;
8122 poly_uint64 vf;
8123 vec_load_store_type vls_type;
8124 tree ref_type;
8125
8126 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8127 return false;
8128
8129 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8130 && ! vec_stmt)
8131 return false;
8132
8133 /* Is vectorizable store? */
8134
8135 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8136 slp_tree mask_node = NULL;
8137 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8138 {
8139 tree scalar_dest = gimple_assign_lhs (assign);
8140 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8141 && is_pattern_stmt_p (stmt_info))
8142 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8143 if (TREE_CODE (scalar_dest) != ARRAY_REF
8144 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8145 && TREE_CODE (scalar_dest) != INDIRECT_REF
8146 && TREE_CODE (scalar_dest) != COMPONENT_REF
8147 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8148 && TREE_CODE (scalar_dest) != REALPART_EXPR
8149 && TREE_CODE (scalar_dest) != MEM_REF)
8150 return false;
8151 }
8152 else
8153 {
8154 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8155 if (!call || !gimple_call_internal_p (call))
8156 return false;
8157
8158 internal_fn ifn = gimple_call_internal_fn (call);
8159 if (!internal_store_fn_p (ifn))
8160 return false;
8161
8162 int mask_index = internal_fn_mask_index (ifn);
8163 if (mask_index >= 0 && slp_node)
8164 mask_index = vect_slp_child_index_for_operand
8165 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8166 if (mask_index >= 0
8167 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8168 &mask, &mask_node, &mask_dt,
8169 &mask_vectype))
8170 return false;
8171 }
8172
8173 /* Cannot have hybrid store SLP -- that would mean storing to the
8174 same location twice. */
8175 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8176
8177 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8178 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8179
8180 if (loop_vinfo)
8181 {
8182 loop = LOOP_VINFO_LOOP (loop_vinfo);
8183 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8184 }
8185 else
8186 vf = 1;
8187
8188 /* Multiple types in SLP are handled by creating the appropriate number of
8189 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8190 case of SLP. */
8191 if (slp)
8192 ncopies = 1;
8193 else
8194 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8195
8196 gcc_assert (ncopies >= 1);
8197
8198 /* FORNOW. This restriction should be relaxed. */
8199 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8200 {
8201 if (dump_enabled_p ())
8202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8203 "multiple types in nested loop.\n");
8204 return false;
8205 }
8206
8207 tree op;
8208 slp_tree op_node;
8209 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8210 &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8211 return false;
8212
8213 elem_type = TREE_TYPE (vectype);
8214 vec_mode = TYPE_MODE (vectype);
8215
8216 if (!STMT_VINFO_DATA_REF (stmt_info))
8217 return false;
8218
8219 vect_memory_access_type memory_access_type;
8220 enum dr_alignment_support alignment_support_scheme;
8221 int misalignment;
8222 poly_int64 poffset;
8223 internal_fn lanes_ifn;
8224 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8225 ncopies, &memory_access_type, &poffset,
8226 &alignment_support_scheme, &misalignment, &gs_info,
8227 &lanes_ifn))
8228 return false;
8229
8230 if (mask)
8231 {
8232 if (memory_access_type == VMAT_CONTIGUOUS)
8233 {
8234 if (!VECTOR_MODE_P (vec_mode)
8235 || !can_vec_mask_load_store_p (vec_mode,
8236 TYPE_MODE (mask_vectype), false))
8237 return false;
8238 }
8239 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8240 && (memory_access_type != VMAT_GATHER_SCATTER
8241 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8242 {
8243 if (dump_enabled_p ())
8244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8245 "unsupported access type for masked store.\n");
8246 return false;
8247 }
8248 else if (memory_access_type == VMAT_GATHER_SCATTER
8249 && gs_info.ifn == IFN_LAST
8250 && !gs_info.decl)
8251 {
8252 if (dump_enabled_p ())
8253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8254 "unsupported masked emulated scatter.\n");
8255 return false;
8256 }
8257 }
8258 else
8259 {
8260 /* FORNOW. In some cases can vectorize even if data-type not supported
8261 (e.g. - array initialization with 0). */
8262 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8263 return false;
8264 }
8265
8266 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8267 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8268 && memory_access_type != VMAT_GATHER_SCATTER
8269 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8270 if (grouped_store)
8271 {
8272 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8273 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8274 group_size = DR_GROUP_SIZE (first_stmt_info);
8275 }
8276 else
8277 {
8278 first_stmt_info = stmt_info;
8279 first_dr_info = dr_info;
8280 group_size = vec_num = 1;
8281 }
8282
8283 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8284 {
8285 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8286 memory_access_type))
8287 return false;
8288 }
8289
8290 bool costing_p = !vec_stmt;
8291 if (costing_p) /* transformation not required. */
8292 {
8293 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8294
8295 if (loop_vinfo
8296 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8297 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8298 vls_type, group_size,
8299 memory_access_type, &gs_info,
8300 mask);
8301
8302 if (slp_node
8303 && (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8304 || (mask
8305 && !vect_maybe_update_slp_op_vectype (mask_node,
8306 mask_vectype))))
8307 {
8308 if (dump_enabled_p ())
8309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8310 "incompatible vector types for invariants\n");
8311 return false;
8312 }
8313
8314 if (dump_enabled_p ()
8315 && memory_access_type != VMAT_ELEMENTWISE
8316 && memory_access_type != VMAT_GATHER_SCATTER
8317 && alignment_support_scheme != dr_aligned)
8318 dump_printf_loc (MSG_NOTE, vect_location,
8319 "Vectorizing an unaligned access.\n");
8320
8321 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8322
8323 /* As function vect_transform_stmt shows, for interleaving stores
8324 the whole chain is vectorized when the last store in the chain
8325 is reached, the other stores in the group are skipped. So we
8326 want to only cost the last one here, but it's not trivial to
8327 get the last, as it's equivalent to use the first one for
8328 costing, use the first one instead. */
8329 if (grouped_store
8330 && !slp
8331 && first_stmt_info != stmt_info)
8332 return true;
8333 }
8334 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8335
8336 /* Transform. */
8337
8338 ensure_base_align (dr_info);
8339
8340 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8341 {
8342 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8343 gcc_assert (!slp);
8344 if (costing_p)
8345 {
8346 unsigned int inside_cost = 0, prologue_cost = 0;
8347 if (vls_type == VLS_STORE_INVARIANT)
8348 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8349 stmt_info, 0, vect_prologue);
8350 vect_get_store_cost (vinfo, stmt_info, ncopies,
8351 alignment_support_scheme, misalignment,
8352 &inside_cost, cost_vec);
8353
8354 if (dump_enabled_p ())
8355 dump_printf_loc (MSG_NOTE, vect_location,
8356 "vect_model_store_cost: inside_cost = %d, "
8357 "prologue_cost = %d .\n",
8358 inside_cost, prologue_cost);
8359
8360 return true;
8361 }
8362 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8363 }
8364
8365 if (grouped_store)
8366 {
8367 /* FORNOW */
8368 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8369
8370 if (slp)
8371 {
8372 grouped_store = false;
8373 /* VEC_NUM is the number of vect stmts to be created for this
8374 group. */
8375 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8376 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8377 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8378 == first_stmt_info);
8379 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8380 op = vect_get_store_rhs (first_stmt_info);
8381 }
8382 else
8383 /* VEC_NUM is the number of vect stmts to be created for this
8384 group. */
8385 vec_num = group_size;
8386
8387 ref_type = get_group_alias_ptr_type (first_stmt_info);
8388 }
8389 else
8390 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8391
8392 if (!costing_p && dump_enabled_p ())
8393 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8394 ncopies);
8395
8396 /* Check if we need to update prologue cost for invariant,
8397 and update it accordingly if so. If it's not for
8398 interleaving store, we can just check vls_type; but if
8399 it's for interleaving store, need to check the def_type
8400 of the stored value since the current vls_type is just
8401 for first_stmt_info. */
8402 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8403 {
8404 gcc_assert (costing_p);
8405 if (slp)
8406 return;
8407 if (grouped_store)
8408 {
8409 gcc_assert (store_rhs);
8410 enum vect_def_type cdt;
8411 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8412 if (cdt != vect_constant_def && cdt != vect_external_def)
8413 return;
8414 }
8415 else if (vls_type != VLS_STORE_INVARIANT)
8416 return;
8417 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8418 0, vect_prologue);
8419 };
8420
8421 if (memory_access_type == VMAT_ELEMENTWISE
8422 || memory_access_type == VMAT_STRIDED_SLP)
8423 {
8424 unsigned inside_cost = 0, prologue_cost = 0;
8425 gimple_stmt_iterator incr_gsi;
8426 bool insert_after;
8427 gimple *incr;
8428 tree offvar;
8429 tree ivstep;
8430 tree running_off;
8431 tree stride_base, stride_step, alias_off;
8432 tree vec_oprnd = NULL_TREE;
8433 tree dr_offset;
8434 unsigned int g;
8435 /* Checked by get_load_store_type. */
8436 unsigned int const_nunits = nunits.to_constant ();
8437
8438 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8439 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8440
8441 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8442 stride_base
8443 = fold_build_pointer_plus
8444 (DR_BASE_ADDRESS (first_dr_info->dr),
8445 size_binop (PLUS_EXPR,
8446 convert_to_ptrofftype (dr_offset),
8447 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8448 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8449
8450 /* For a store with loop-invariant (but other than power-of-2)
8451 stride (i.e. not a grouped access) like so:
8452
8453 for (i = 0; i < n; i += stride)
8454 array[i] = ...;
8455
8456 we generate a new induction variable and new stores from
8457 the components of the (vectorized) rhs:
8458
8459 for (j = 0; ; j += VF*stride)
8460 vectemp = ...;
8461 tmp1 = vectemp[0];
8462 array[j] = tmp1;
8463 tmp2 = vectemp[1];
8464 array[j + stride] = tmp2;
8465 ...
8466 */
8467
8468 unsigned nstores = const_nunits;
8469 unsigned lnel = 1;
8470 tree ltype = elem_type;
8471 tree lvectype = vectype;
8472 if (slp)
8473 {
8474 if (group_size < const_nunits
8475 && const_nunits % group_size == 0)
8476 {
8477 nstores = const_nunits / group_size;
8478 lnel = group_size;
8479 ltype = build_vector_type (elem_type, group_size);
8480 lvectype = vectype;
8481
8482 /* First check if vec_extract optab doesn't support extraction
8483 of vector elts directly. */
8484 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8485 machine_mode vmode;
8486 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8487 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8488 group_size).exists (&vmode)
8489 || (convert_optab_handler (vec_extract_optab,
8490 TYPE_MODE (vectype), vmode)
8491 == CODE_FOR_nothing))
8492 {
8493 /* Try to avoid emitting an extract of vector elements
8494 by performing the extracts using an integer type of the
8495 same size, extracting from a vector of those and then
8496 re-interpreting it as the original vector type if
8497 supported. */
8498 unsigned lsize
8499 = group_size * GET_MODE_BITSIZE (elmode);
8500 unsigned int lnunits = const_nunits / group_size;
8501 /* If we can't construct such a vector fall back to
8502 element extracts from the original vector type and
8503 element size stores. */
8504 if (int_mode_for_size (lsize, 0).exists (&elmode)
8505 && VECTOR_MODE_P (TYPE_MODE (vectype))
8506 && related_vector_mode (TYPE_MODE (vectype), elmode,
8507 lnunits).exists (&vmode)
8508 && (convert_optab_handler (vec_extract_optab,
8509 vmode, elmode)
8510 != CODE_FOR_nothing))
8511 {
8512 nstores = lnunits;
8513 lnel = group_size;
8514 ltype = build_nonstandard_integer_type (lsize, 1);
8515 lvectype = build_vector_type (ltype, nstores);
8516 }
8517 /* Else fall back to vector extraction anyway.
8518 Fewer stores are more important than avoiding spilling
8519 of the vector we extract from. Compared to the
8520 construction case in vectorizable_load no store-forwarding
8521 issue exists here for reasonable archs. */
8522 }
8523 }
8524 else if (group_size >= const_nunits
8525 && group_size % const_nunits == 0)
8526 {
8527 int mis_align = dr_misalignment (first_dr_info, vectype);
8528 dr_alignment_support dr_align
8529 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8530 mis_align);
8531 if (dr_align == dr_aligned
8532 || dr_align == dr_unaligned_supported)
8533 {
8534 nstores = 1;
8535 lnel = const_nunits;
8536 ltype = vectype;
8537 lvectype = vectype;
8538 alignment_support_scheme = dr_align;
8539 misalignment = mis_align;
8540 }
8541 }
8542 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8543 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8544 }
8545
8546 if (!costing_p)
8547 {
8548 ivstep = stride_step;
8549 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8550 build_int_cst (TREE_TYPE (ivstep), vf));
8551
8552 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8553
8554 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8555 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8556 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8557 insert_after, &offvar, NULL);
8558 incr = gsi_stmt (incr_gsi);
8559
8560 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8561 }
8562
8563 alias_off = build_int_cst (ref_type, 0);
8564 stmt_vec_info next_stmt_info = first_stmt_info;
8565 auto_vec<tree> vec_oprnds;
8566 /* For costing some adjacent vector stores, we'd like to cost with
8567 the total number of them once instead of cost each one by one. */
8568 unsigned int n_adjacent_stores = 0;
8569 for (g = 0; g < group_size; g++)
8570 {
8571 running_off = offvar;
8572 if (!costing_p)
8573 {
8574 if (g)
8575 {
8576 tree size = TYPE_SIZE_UNIT (ltype);
8577 tree pos
8578 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8579 tree newoff = copy_ssa_name (running_off, NULL);
8580 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8581 running_off, pos);
8582 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8583 running_off = newoff;
8584 }
8585 }
8586 if (!slp)
8587 op = vect_get_store_rhs (next_stmt_info);
8588 if (!costing_p)
8589 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8590 &vec_oprnds);
8591 else
8592 update_prologue_cost (&prologue_cost, op);
8593 unsigned int group_el = 0;
8594 unsigned HOST_WIDE_INT
8595 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8596 for (j = 0; j < ncopies; j++)
8597 {
8598 if (!costing_p)
8599 {
8600 vec_oprnd = vec_oprnds[j];
8601 /* Pun the vector to extract from if necessary. */
8602 if (lvectype != vectype)
8603 {
8604 tree tem = make_ssa_name (lvectype);
8605 tree cvt
8606 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8607 gimple *pun = gimple_build_assign (tem, cvt);
8608 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8609 vec_oprnd = tem;
8610 }
8611 }
8612 for (i = 0; i < nstores; i++)
8613 {
8614 if (costing_p)
8615 {
8616 /* Only need vector extracting when there are more
8617 than one stores. */
8618 if (nstores > 1)
8619 inside_cost
8620 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8621 stmt_info, 0, vect_body);
8622 /* Take a single lane vector type store as scalar
8623 store to avoid ICE like 110776. */
8624 if (VECTOR_TYPE_P (ltype)
8625 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8626 n_adjacent_stores++;
8627 else
8628 inside_cost
8629 += record_stmt_cost (cost_vec, 1, scalar_store,
8630 stmt_info, 0, vect_body);
8631 continue;
8632 }
8633 tree newref, newoff;
8634 gimple *incr, *assign;
8635 tree size = TYPE_SIZE (ltype);
8636 /* Extract the i'th component. */
8637 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8638 bitsize_int (i), size);
8639 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8640 size, pos);
8641
8642 elem = force_gimple_operand_gsi (gsi, elem, true,
8643 NULL_TREE, true,
8644 GSI_SAME_STMT);
8645
8646 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8647 group_el * elsz);
8648 newref = build2 (MEM_REF, ltype,
8649 running_off, this_off);
8650 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8651
8652 /* And store it to *running_off. */
8653 assign = gimple_build_assign (newref, elem);
8654 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8655
8656 group_el += lnel;
8657 if (! slp
8658 || group_el == group_size)
8659 {
8660 newoff = copy_ssa_name (running_off, NULL);
8661 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8662 running_off, stride_step);
8663 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8664
8665 running_off = newoff;
8666 group_el = 0;
8667 }
8668 if (g == group_size - 1
8669 && !slp)
8670 {
8671 if (j == 0 && i == 0)
8672 *vec_stmt = assign;
8673 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8674 }
8675 }
8676 }
8677 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8678 vec_oprnds.truncate(0);
8679 if (slp)
8680 break;
8681 }
8682
8683 if (costing_p)
8684 {
8685 if (n_adjacent_stores > 0)
8686 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8687 alignment_support_scheme, misalignment,
8688 &inside_cost, cost_vec);
8689 if (dump_enabled_p ())
8690 dump_printf_loc (MSG_NOTE, vect_location,
8691 "vect_model_store_cost: inside_cost = %d, "
8692 "prologue_cost = %d .\n",
8693 inside_cost, prologue_cost);
8694 }
8695
8696 return true;
8697 }
8698
8699 gcc_assert (alignment_support_scheme);
8700 vec_loop_masks *loop_masks
8701 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8702 ? &LOOP_VINFO_MASKS (loop_vinfo)
8703 : NULL);
8704 vec_loop_lens *loop_lens
8705 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8706 ? &LOOP_VINFO_LENS (loop_vinfo)
8707 : NULL);
8708
8709 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
8710 are some difference here. We cannot enable both the lens and masks
8711 during transform but it is allowed during analysis.
8712 Shouldn't go with length-based approach if fully masked. */
8713 if (cost_vec == NULL)
8714 /* The cost_vec is NULL during transfrom. */
8715 gcc_assert ((!loop_lens || !loop_masks));
8716
8717 /* Targets with store-lane instructions must not require explicit
8718 realignment. vect_supportable_dr_alignment always returns either
8719 dr_aligned or dr_unaligned_supported for masked operations. */
8720 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8721 && !mask
8722 && !loop_masks)
8723 || alignment_support_scheme == dr_aligned
8724 || alignment_support_scheme == dr_unaligned_supported);
8725
8726 tree offset = NULL_TREE;
8727 if (!known_eq (poffset, 0))
8728 offset = size_int (poffset);
8729
8730 tree bump;
8731 tree vec_offset = NULL_TREE;
8732 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8733 {
8734 aggr_type = NULL_TREE;
8735 bump = NULL_TREE;
8736 }
8737 else if (memory_access_type == VMAT_GATHER_SCATTER)
8738 {
8739 aggr_type = elem_type;
8740 if (!costing_p)
8741 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8742 &bump, &vec_offset, loop_lens);
8743 }
8744 else
8745 {
8746 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8747 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8748 else
8749 aggr_type = vectype;
8750 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8751 memory_access_type, loop_lens);
8752 }
8753
8754 if (mask && !costing_p)
8755 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8756
8757 /* In case the vectorization factor (VF) is bigger than the number
8758 of elements that we can fit in a vectype (nunits), we have to generate
8759 more than one vector stmt - i.e - we need to "unroll" the
8760 vector stmt by a factor VF/nunits. */
8761
8762 /* In case of interleaving (non-unit grouped access):
8763
8764 S1: &base + 2 = x2
8765 S2: &base = x0
8766 S3: &base + 1 = x1
8767 S4: &base + 3 = x3
8768
8769 We create vectorized stores starting from base address (the access of the
8770 first stmt in the chain (S2 in the above example), when the last store stmt
8771 of the chain (S4) is reached:
8772
8773 VS1: &base = vx2
8774 VS2: &base + vec_size*1 = vx0
8775 VS3: &base + vec_size*2 = vx1
8776 VS4: &base + vec_size*3 = vx3
8777
8778 Then permutation statements are generated:
8779
8780 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8781 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8782 ...
8783
8784 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8785 (the order of the data-refs in the output of vect_permute_store_chain
8786 corresponds to the order of scalar stmts in the interleaving chain - see
8787 the documentation of vect_permute_store_chain()).
8788
8789 In case of both multiple types and interleaving, above vector stores and
8790 permutation stmts are created for every copy. The result vector stmts are
8791 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8792 STMT_VINFO_RELATED_STMT for the next copies.
8793 */
8794
8795 auto_vec<tree> dr_chain (group_size);
8796 auto_vec<tree> vec_masks;
8797 tree vec_mask = NULL;
8798 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8799 for (i = 0; i < group_size; i++)
8800 gvec_oprnds.quick_push (new auto_vec<tree> ());
8801
8802 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8803 {
8804 gcc_assert (!slp && grouped_store);
8805 unsigned inside_cost = 0, prologue_cost = 0;
8806 /* For costing some adjacent vector stores, we'd like to cost with
8807 the total number of them once instead of cost each one by one. */
8808 unsigned int n_adjacent_stores = 0;
8809 for (j = 0; j < ncopies; j++)
8810 {
8811 gimple *new_stmt;
8812 if (j == 0)
8813 {
8814 /* For interleaved stores we collect vectorized defs for all
8815 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8816 as an input to vect_permute_store_chain(). */
8817 stmt_vec_info next_stmt_info = first_stmt_info;
8818 for (i = 0; i < group_size; i++)
8819 {
8820 /* Since gaps are not supported for interleaved stores,
8821 DR_GROUP_SIZE is the exact number of stmts in the
8822 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8823 op = vect_get_store_rhs (next_stmt_info);
8824 if (costing_p)
8825 update_prologue_cost (&prologue_cost, op);
8826 else
8827 {
8828 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8829 ncopies, op,
8830 gvec_oprnds[i]);
8831 vec_oprnd = (*gvec_oprnds[i])[0];
8832 dr_chain.quick_push (vec_oprnd);
8833 }
8834 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8835 }
8836
8837 if (!costing_p)
8838 {
8839 if (mask)
8840 {
8841 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8842 mask, &vec_masks,
8843 mask_vectype);
8844 vec_mask = vec_masks[0];
8845 }
8846
8847 /* We should have catched mismatched types earlier. */
8848 gcc_assert (
8849 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8850 dataref_ptr
8851 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8852 aggr_type, NULL, offset, &dummy,
8853 gsi, &ptr_incr, false, bump);
8854 }
8855 }
8856 else if (!costing_p)
8857 {
8858 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8859 /* DR_CHAIN is then used as an input to
8860 vect_permute_store_chain(). */
8861 for (i = 0; i < group_size; i++)
8862 {
8863 vec_oprnd = (*gvec_oprnds[i])[j];
8864 dr_chain[i] = vec_oprnd;
8865 }
8866 if (mask)
8867 vec_mask = vec_masks[j];
8868 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8869 stmt_info, bump);
8870 }
8871
8872 if (costing_p)
8873 {
8874 n_adjacent_stores += vec_num;
8875 continue;
8876 }
8877
8878 /* Get an array into which we can store the individual vectors. */
8879 tree vec_array = create_vector_array (vectype, vec_num);
8880
8881 /* Invalidate the current contents of VEC_ARRAY. This should
8882 become an RTL clobber too, which prevents the vector registers
8883 from being upward-exposed. */
8884 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8885
8886 /* Store the individual vectors into the array. */
8887 for (i = 0; i < vec_num; i++)
8888 {
8889 vec_oprnd = dr_chain[i];
8890 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8891 i);
8892 }
8893
8894 tree final_mask = NULL;
8895 tree final_len = NULL;
8896 tree bias = NULL;
8897 if (loop_masks)
8898 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8899 ncopies, vectype, j);
8900 if (vec_mask)
8901 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8902 vec_mask, gsi);
8903
8904 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8905 {
8906 if (loop_lens)
8907 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8908 ncopies, vectype, j, 1);
8909 else
8910 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8911 signed char biasval
8912 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8913 bias = build_int_cst (intQI_type_node, biasval);
8914 if (!final_mask)
8915 {
8916 mask_vectype = truth_type_for (vectype);
8917 final_mask = build_minus_one_cst (mask_vectype);
8918 }
8919 }
8920
8921 gcall *call;
8922 if (final_len && final_mask)
8923 {
8924 /* Emit:
8925 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8926 LEN, BIAS, VEC_ARRAY). */
8927 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8928 tree alias_ptr = build_int_cst (ref_type, align);
8929 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8930 dataref_ptr, alias_ptr,
8931 final_mask, final_len, bias,
8932 vec_array);
8933 }
8934 else if (final_mask)
8935 {
8936 /* Emit:
8937 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8938 VEC_ARRAY). */
8939 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8940 tree alias_ptr = build_int_cst (ref_type, align);
8941 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8942 dataref_ptr, alias_ptr,
8943 final_mask, vec_array);
8944 }
8945 else
8946 {
8947 /* Emit:
8948 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8949 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8950 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8951 gimple_call_set_lhs (call, data_ref);
8952 }
8953 gimple_call_set_nothrow (call, true);
8954 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8955 new_stmt = call;
8956
8957 /* Record that VEC_ARRAY is now dead. */
8958 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8959 if (j == 0)
8960 *vec_stmt = new_stmt;
8961 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8962 }
8963
8964 if (costing_p)
8965 {
8966 if (n_adjacent_stores > 0)
8967 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8968 alignment_support_scheme, misalignment,
8969 &inside_cost, cost_vec);
8970 if (dump_enabled_p ())
8971 dump_printf_loc (MSG_NOTE, vect_location,
8972 "vect_model_store_cost: inside_cost = %d, "
8973 "prologue_cost = %d .\n",
8974 inside_cost, prologue_cost);
8975 }
8976
8977 return true;
8978 }
8979
8980 if (memory_access_type == VMAT_GATHER_SCATTER)
8981 {
8982 gcc_assert (!grouped_store);
8983 auto_vec<tree> vec_offsets;
8984 unsigned int inside_cost = 0, prologue_cost = 0;
8985 for (j = 0; j < ncopies; j++)
8986 {
8987 gimple *new_stmt;
8988 if (j == 0)
8989 {
8990 if (costing_p && vls_type == VLS_STORE_INVARIANT)
8991 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8992 stmt_info, 0, vect_prologue);
8993 else if (!costing_p)
8994 {
8995 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8996 DR_CHAIN is of size 1. */
8997 gcc_assert (group_size == 1);
8998 if (slp_node)
8999 vect_get_slp_defs (op_node, gvec_oprnds[0]);
9000 else
9001 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
9002 ncopies, op, gvec_oprnds[0]);
9003 if (mask)
9004 {
9005 if (slp_node)
9006 vect_get_slp_defs (mask_node, &vec_masks);
9007 else
9008 vect_get_vec_defs_for_operand (vinfo, stmt_info,
9009 ncopies,
9010 mask, &vec_masks,
9011 mask_vectype);
9012 }
9013
9014 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9015 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9016 slp_node, &gs_info,
9017 &dataref_ptr, &vec_offsets);
9018 else
9019 dataref_ptr
9020 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
9021 aggr_type, NULL, offset,
9022 &dummy, gsi, &ptr_incr, false,
9023 bump);
9024 }
9025 }
9026 else if (!costing_p)
9027 {
9028 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9029 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9030 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9031 gsi, stmt_info, bump);
9032 }
9033
9034 new_stmt = NULL;
9035 for (i = 0; i < vec_num; ++i)
9036 {
9037 if (!costing_p)
9038 {
9039 vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i];
9040 if (mask)
9041 vec_mask = vec_masks[vec_num * j + i];
9042 /* We should have catched mismatched types earlier. */
9043 gcc_assert (useless_type_conversion_p (vectype,
9044 TREE_TYPE (vec_oprnd)));
9045 }
9046 unsigned HOST_WIDE_INT align;
9047 tree final_mask = NULL_TREE;
9048 tree final_len = NULL_TREE;
9049 tree bias = NULL_TREE;
9050 if (!costing_p)
9051 {
9052 if (loop_masks)
9053 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
9054 loop_masks, ncopies,
9055 vectype, j);
9056 if (vec_mask)
9057 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9058 final_mask, vec_mask, gsi);
9059 }
9060
9061 if (gs_info.ifn != IFN_LAST)
9062 {
9063 if (costing_p)
9064 {
9065 unsigned int cnunits = vect_nunits_for_cost (vectype);
9066 inside_cost
9067 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9068 stmt_info, 0, vect_body);
9069 continue;
9070 }
9071
9072 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9073 vec_offset = vec_offsets[vec_num * j + i];
9074 tree scale = size_int (gs_info.scale);
9075
9076 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9077 {
9078 if (loop_lens)
9079 final_len = vect_get_loop_len (loop_vinfo, gsi,
9080 loop_lens, ncopies,
9081 vectype, j, 1);
9082 else
9083 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9084 signed char biasval
9085 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9086 bias = build_int_cst (intQI_type_node, biasval);
9087 if (!final_mask)
9088 {
9089 mask_vectype = truth_type_for (vectype);
9090 final_mask = build_minus_one_cst (mask_vectype);
9091 }
9092 }
9093
9094 gcall *call;
9095 if (final_len && final_mask)
9096 call = gimple_build_call_internal
9097 (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
9098 vec_offset, scale, vec_oprnd, final_mask,
9099 final_len, bias);
9100 else if (final_mask)
9101 call = gimple_build_call_internal
9102 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
9103 vec_offset, scale, vec_oprnd, final_mask);
9104 else
9105 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9106 dataref_ptr, vec_offset,
9107 scale, vec_oprnd);
9108 gimple_call_set_nothrow (call, true);
9109 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9110 new_stmt = call;
9111 }
9112 else if (gs_info.decl)
9113 {
9114 /* The builtin decls path for scatter is legacy, x86 only. */
9115 gcc_assert (nunits.is_constant ()
9116 && (!final_mask
9117 || SCALAR_INT_MODE_P
9118 (TYPE_MODE (TREE_TYPE (final_mask)))));
9119 if (costing_p)
9120 {
9121 unsigned int cnunits = vect_nunits_for_cost (vectype);
9122 inside_cost
9123 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9124 stmt_info, 0, vect_body);
9125 continue;
9126 }
9127 poly_uint64 offset_nunits
9128 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
9129 if (known_eq (nunits, offset_nunits))
9130 {
9131 new_stmt = vect_build_one_scatter_store_call
9132 (vinfo, stmt_info, gsi, &gs_info,
9133 dataref_ptr, vec_offsets[vec_num * j + i],
9134 vec_oprnd, final_mask);
9135 vect_finish_stmt_generation (vinfo, stmt_info,
9136 new_stmt, gsi);
9137 }
9138 else if (known_eq (nunits, offset_nunits * 2))
9139 {
9140 /* We have a offset vector with half the number of
9141 lanes but the builtins will store full vectype
9142 data from the lower lanes. */
9143 new_stmt = vect_build_one_scatter_store_call
9144 (vinfo, stmt_info, gsi, &gs_info,
9145 dataref_ptr,
9146 vec_offsets[2 * vec_num * j + 2 * i],
9147 vec_oprnd, final_mask);
9148 vect_finish_stmt_generation (vinfo, stmt_info,
9149 new_stmt, gsi);
9150 int count = nunits.to_constant ();
9151 vec_perm_builder sel (count, count, 1);
9152 sel.quick_grow (count);
9153 for (int i = 0; i < count; ++i)
9154 sel[i] = i | (count / 2);
9155 vec_perm_indices indices (sel, 2, count);
9156 tree perm_mask
9157 = vect_gen_perm_mask_checked (vectype, indices);
9158 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9159 vec_oprnd, vec_oprnd,
9160 perm_mask);
9161 vec_oprnd = make_ssa_name (vectype);
9162 gimple_set_lhs (new_stmt, vec_oprnd);
9163 vect_finish_stmt_generation (vinfo, stmt_info,
9164 new_stmt, gsi);
9165 if (final_mask)
9166 {
9167 new_stmt = gimple_build_assign (NULL_TREE,
9168 VEC_UNPACK_HI_EXPR,
9169 final_mask);
9170 final_mask = make_ssa_name
9171 (truth_type_for (gs_info.offset_vectype));
9172 gimple_set_lhs (new_stmt, final_mask);
9173 vect_finish_stmt_generation (vinfo, stmt_info,
9174 new_stmt, gsi);
9175 }
9176 new_stmt = vect_build_one_scatter_store_call
9177 (vinfo, stmt_info, gsi, &gs_info,
9178 dataref_ptr,
9179 vec_offsets[2 * vec_num * j + 2 * i + 1],
9180 vec_oprnd, final_mask);
9181 vect_finish_stmt_generation (vinfo, stmt_info,
9182 new_stmt, gsi);
9183 }
9184 else if (known_eq (nunits * 2, offset_nunits))
9185 {
9186 /* We have a offset vector with double the number of
9187 lanes. Select the low/high part accordingly. */
9188 vec_offset = vec_offsets[(vec_num * j + i) / 2];
9189 if ((vec_num * j + i) & 1)
9190 {
9191 int count = offset_nunits.to_constant ();
9192 vec_perm_builder sel (count, count, 1);
9193 sel.quick_grow (count);
9194 for (int i = 0; i < count; ++i)
9195 sel[i] = i | (count / 2);
9196 vec_perm_indices indices (sel, 2, count);
9197 tree perm_mask = vect_gen_perm_mask_checked
9198 (TREE_TYPE (vec_offset), indices);
9199 new_stmt = gimple_build_assign (NULL_TREE,
9200 VEC_PERM_EXPR,
9201 vec_offset,
9202 vec_offset,
9203 perm_mask);
9204 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9205 gimple_set_lhs (new_stmt, vec_offset);
9206 vect_finish_stmt_generation (vinfo, stmt_info,
9207 new_stmt, gsi);
9208 }
9209 new_stmt = vect_build_one_scatter_store_call
9210 (vinfo, stmt_info, gsi, &gs_info,
9211 dataref_ptr, vec_offset,
9212 vec_oprnd, final_mask);
9213 vect_finish_stmt_generation (vinfo, stmt_info,
9214 new_stmt, gsi);
9215 }
9216 else
9217 gcc_unreachable ();
9218 }
9219 else
9220 {
9221 /* Emulated scatter. */
9222 gcc_assert (!final_mask);
9223 if (costing_p)
9224 {
9225 unsigned int cnunits = vect_nunits_for_cost (vectype);
9226 /* For emulated scatter N offset vector element extracts
9227 (we assume the scalar scaling and ptr + offset add is
9228 consumed by the load). */
9229 inside_cost
9230 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9231 stmt_info, 0, vect_body);
9232 /* N scalar stores plus extracting the elements. */
9233 inside_cost
9234 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9235 stmt_info, 0, vect_body);
9236 inside_cost
9237 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9238 stmt_info, 0, vect_body);
9239 continue;
9240 }
9241
9242 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9243 unsigned HOST_WIDE_INT const_offset_nunits
9244 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9245 vec<constructor_elt, va_gc> *ctor_elts;
9246 vec_alloc (ctor_elts, const_nunits);
9247 gimple_seq stmts = NULL;
9248 tree elt_type = TREE_TYPE (vectype);
9249 unsigned HOST_WIDE_INT elt_size
9250 = tree_to_uhwi (TYPE_SIZE (elt_type));
9251 /* We support offset vectors with more elements
9252 than the data vector for now. */
9253 unsigned HOST_WIDE_INT factor
9254 = const_offset_nunits / const_nunits;
9255 vec_offset = vec_offsets[(vec_num * j + i) / factor];
9256 unsigned elt_offset
9257 = ((vec_num * j + i) % factor) * const_nunits;
9258 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9259 tree scale = size_int (gs_info.scale);
9260 align = get_object_alignment (DR_REF (first_dr_info->dr));
9261 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9262 for (unsigned k = 0; k < const_nunits; ++k)
9263 {
9264 /* Compute the offsetted pointer. */
9265 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9266 bitsize_int (k + elt_offset));
9267 tree idx
9268 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
9269 vec_offset, TYPE_SIZE (idx_type), boff);
9270 idx = gimple_convert (&stmts, sizetype, idx);
9271 idx = gimple_build (&stmts, MULT_EXPR, sizetype,
9272 idx, scale);
9273 tree ptr
9274 = gimple_build (&stmts, PLUS_EXPR,
9275 TREE_TYPE (dataref_ptr),
9276 dataref_ptr, idx);
9277 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9278 /* Extract the element to be stored. */
9279 tree elt
9280 = gimple_build (&stmts, BIT_FIELD_REF,
9281 TREE_TYPE (vectype),
9282 vec_oprnd, TYPE_SIZE (elt_type),
9283 bitsize_int (k * elt_size));
9284 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9285 stmts = NULL;
9286 tree ref
9287 = build2 (MEM_REF, ltype, ptr,
9288 build_int_cst (ref_type, 0));
9289 new_stmt = gimple_build_assign (ref, elt);
9290 vect_finish_stmt_generation (vinfo, stmt_info,
9291 new_stmt, gsi);
9292 }
9293 if (slp)
9294 slp_node->push_vec_def (new_stmt);
9295 }
9296 }
9297 if (!slp && !costing_p)
9298 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9299 }
9300
9301 if (!slp && !costing_p)
9302 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9303
9304 if (costing_p && dump_enabled_p ())
9305 dump_printf_loc (MSG_NOTE, vect_location,
9306 "vect_model_store_cost: inside_cost = %d, "
9307 "prologue_cost = %d .\n",
9308 inside_cost, prologue_cost);
9309
9310 return true;
9311 }
9312
9313 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9314 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9315 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9316 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9317
9318 unsigned inside_cost = 0, prologue_cost = 0;
9319 /* For costing some adjacent vector stores, we'd like to cost with
9320 the total number of them once instead of cost each one by one. */
9321 unsigned int n_adjacent_stores = 0;
9322 auto_vec<tree> result_chain (group_size);
9323 auto_vec<tree, 1> vec_oprnds;
9324 for (j = 0; j < ncopies; j++)
9325 {
9326 gimple *new_stmt;
9327 if (j == 0)
9328 {
9329 if (slp && !costing_p)
9330 {
9331 /* Get vectorized arguments for SLP_NODE. */
9332 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9333 &vec_oprnds, mask, &vec_masks);
9334 vec_oprnd = vec_oprnds[0];
9335 if (mask)
9336 vec_mask = vec_masks[0];
9337 }
9338 else
9339 {
9340 /* For interleaved stores we collect vectorized defs for all the
9341 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9342 input to vect_permute_store_chain().
9343
9344 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9345 is of size 1. */
9346 stmt_vec_info next_stmt_info = first_stmt_info;
9347 for (i = 0; i < group_size; i++)
9348 {
9349 /* Since gaps are not supported for interleaved stores,
9350 DR_GROUP_SIZE is the exact number of stmts in the chain.
9351 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9352 that there is no interleaving, DR_GROUP_SIZE is 1,
9353 and only one iteration of the loop will be executed. */
9354 op = vect_get_store_rhs (next_stmt_info);
9355 if (costing_p)
9356 update_prologue_cost (&prologue_cost, op);
9357 else
9358 {
9359 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9360 ncopies, op,
9361 gvec_oprnds[i]);
9362 vec_oprnd = (*gvec_oprnds[i])[0];
9363 dr_chain.quick_push (vec_oprnd);
9364 }
9365 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9366 }
9367 if (mask && !costing_p)
9368 {
9369 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9370 mask, &vec_masks,
9371 mask_vectype);
9372 vec_mask = vec_masks[0];
9373 }
9374 }
9375
9376 /* We should have catched mismatched types earlier. */
9377 gcc_assert (costing_p
9378 || useless_type_conversion_p (vectype,
9379 TREE_TYPE (vec_oprnd)));
9380 bool simd_lane_access_p
9381 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9382 if (!costing_p
9383 && simd_lane_access_p
9384 && !loop_masks
9385 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9386 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9387 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9388 && integer_zerop (DR_INIT (first_dr_info->dr))
9389 && alias_sets_conflict_p (get_alias_set (aggr_type),
9390 get_alias_set (TREE_TYPE (ref_type))))
9391 {
9392 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9393 dataref_offset = build_int_cst (ref_type, 0);
9394 }
9395 else if (!costing_p)
9396 dataref_ptr
9397 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9398 simd_lane_access_p ? loop : NULL,
9399 offset, &dummy, gsi, &ptr_incr,
9400 simd_lane_access_p, bump);
9401 }
9402 else if (!costing_p)
9403 {
9404 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9405 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9406 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9407 of size 1. */
9408 for (i = 0; i < group_size; i++)
9409 {
9410 vec_oprnd = (*gvec_oprnds[i])[j];
9411 dr_chain[i] = vec_oprnd;
9412 }
9413 if (mask)
9414 vec_mask = vec_masks[j];
9415 if (dataref_offset)
9416 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9417 else
9418 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9419 stmt_info, bump);
9420 }
9421
9422 new_stmt = NULL;
9423 if (grouped_store)
9424 {
9425 /* Permute. */
9426 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9427 if (costing_p)
9428 {
9429 int group_size = DR_GROUP_SIZE (first_stmt_info);
9430 int nstmts = ceil_log2 (group_size) * group_size;
9431 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9432 stmt_info, 0, vect_body);
9433 if (dump_enabled_p ())
9434 dump_printf_loc (MSG_NOTE, vect_location,
9435 "vect_model_store_cost: "
9436 "strided group_size = %d .\n",
9437 group_size);
9438 }
9439 else
9440 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9441 gsi, &result_chain);
9442 }
9443
9444 stmt_vec_info next_stmt_info = first_stmt_info;
9445 for (i = 0; i < vec_num; i++)
9446 {
9447 if (!costing_p)
9448 {
9449 if (slp)
9450 vec_oprnd = vec_oprnds[i];
9451 else if (grouped_store)
9452 /* For grouped stores vectorized defs are interleaved in
9453 vect_permute_store_chain(). */
9454 vec_oprnd = result_chain[i];
9455 }
9456
9457 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9458 {
9459 if (costing_p)
9460 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9461 stmt_info, 0, vect_body);
9462 else
9463 {
9464 tree perm_mask = perm_mask_for_reverse (vectype);
9465 tree perm_dest = vect_create_destination_var (
9466 vect_get_store_rhs (stmt_info), vectype);
9467 tree new_temp = make_ssa_name (perm_dest);
9468
9469 /* Generate the permute statement. */
9470 gimple *perm_stmt
9471 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9472 vec_oprnd, perm_mask);
9473 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9474 gsi);
9475
9476 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9477 vec_oprnd = new_temp;
9478 }
9479 }
9480
9481 if (costing_p)
9482 {
9483 n_adjacent_stores++;
9484
9485 if (!slp)
9486 {
9487 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9488 if (!next_stmt_info)
9489 break;
9490 }
9491
9492 continue;
9493 }
9494
9495 tree final_mask = NULL_TREE;
9496 tree final_len = NULL_TREE;
9497 tree bias = NULL_TREE;
9498 if (loop_masks)
9499 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9500 vec_num * ncopies, vectype,
9501 vec_num * j + i);
9502 if (slp && vec_mask)
9503 vec_mask = vec_masks[i];
9504 if (vec_mask)
9505 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9506 vec_mask, gsi);
9507
9508 if (i > 0)
9509 /* Bump the vector pointer. */
9510 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9511 stmt_info, bump);
9512
9513 unsigned misalign;
9514 unsigned HOST_WIDE_INT align;
9515 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9516 if (alignment_support_scheme == dr_aligned)
9517 misalign = 0;
9518 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9519 {
9520 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9521 misalign = 0;
9522 }
9523 else
9524 misalign = misalignment;
9525 if (dataref_offset == NULL_TREE
9526 && TREE_CODE (dataref_ptr) == SSA_NAME)
9527 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9528 misalign);
9529 align = least_bit_hwi (misalign | align);
9530
9531 /* Compute IFN when LOOP_LENS or final_mask valid. */
9532 machine_mode vmode = TYPE_MODE (vectype);
9533 machine_mode new_vmode = vmode;
9534 internal_fn partial_ifn = IFN_LAST;
9535 if (loop_lens)
9536 {
9537 opt_machine_mode new_ovmode
9538 = get_len_load_store_mode (vmode, false, &partial_ifn);
9539 new_vmode = new_ovmode.require ();
9540 unsigned factor
9541 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9542 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9543 vec_num * ncopies, vectype,
9544 vec_num * j + i, factor);
9545 }
9546 else if (final_mask)
9547 {
9548 if (!can_vec_mask_load_store_p (
9549 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9550 &partial_ifn))
9551 gcc_unreachable ();
9552 }
9553
9554 if (partial_ifn == IFN_MASK_LEN_STORE)
9555 {
9556 if (!final_len)
9557 {
9558 /* Pass VF value to 'len' argument of
9559 MASK_LEN_STORE if LOOP_LENS is invalid. */
9560 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9561 }
9562 if (!final_mask)
9563 {
9564 /* Pass all ones value to 'mask' argument of
9565 MASK_LEN_STORE if final_mask is invalid. */
9566 mask_vectype = truth_type_for (vectype);
9567 final_mask = build_minus_one_cst (mask_vectype);
9568 }
9569 }
9570 if (final_len)
9571 {
9572 signed char biasval
9573 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9574
9575 bias = build_int_cst (intQI_type_node, biasval);
9576 }
9577
9578 /* Arguments are ready. Create the new vector stmt. */
9579 if (final_len)
9580 {
9581 gcall *call;
9582 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9583 /* Need conversion if it's wrapped with VnQI. */
9584 if (vmode != new_vmode)
9585 {
9586 tree new_vtype
9587 = build_vector_type_for_mode (unsigned_intQI_type_node,
9588 new_vmode);
9589 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9590 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9591 gassign *new_stmt
9592 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9593 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9594 vec_oprnd = var;
9595 }
9596
9597 if (partial_ifn == IFN_MASK_LEN_STORE)
9598 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9599 dataref_ptr, ptr, final_mask,
9600 final_len, bias, vec_oprnd);
9601 else
9602 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9603 dataref_ptr, ptr, final_len,
9604 bias, vec_oprnd);
9605 gimple_call_set_nothrow (call, true);
9606 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9607 new_stmt = call;
9608 }
9609 else if (final_mask)
9610 {
9611 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9612 gcall *call
9613 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9614 ptr, final_mask, vec_oprnd);
9615 gimple_call_set_nothrow (call, true);
9616 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9617 new_stmt = call;
9618 }
9619 else
9620 {
9621 data_ref
9622 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9623 dataref_offset ? dataref_offset
9624 : build_int_cst (ref_type, 0));
9625 if (alignment_support_scheme == dr_aligned)
9626 ;
9627 else
9628 TREE_TYPE (data_ref)
9629 = build_aligned_type (TREE_TYPE (data_ref),
9630 align * BITS_PER_UNIT);
9631 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9632 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9633 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9634 }
9635
9636 if (slp)
9637 continue;
9638
9639 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9640 if (!next_stmt_info)
9641 break;
9642 }
9643 if (!slp && !costing_p)
9644 {
9645 if (j == 0)
9646 *vec_stmt = new_stmt;
9647 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9648 }
9649 }
9650
9651 if (costing_p)
9652 {
9653 if (n_adjacent_stores > 0)
9654 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9655 alignment_support_scheme, misalignment,
9656 &inside_cost, cost_vec);
9657
9658 /* When vectorizing a store into the function result assign
9659 a penalty if the function returns in a multi-register location.
9660 In this case we assume we'll end up with having to spill the
9661 vector result and do piecewise loads as a conservative estimate. */
9662 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9663 if (base
9664 && (TREE_CODE (base) == RESULT_DECL
9665 || (DECL_P (base) && cfun_returns (base)))
9666 && !aggregate_value_p (base, cfun->decl))
9667 {
9668 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9669 /* ??? Handle PARALLEL in some way. */
9670 if (REG_P (reg))
9671 {
9672 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9673 /* Assume that a single reg-reg move is possible and cheap,
9674 do not account for vector to gp register move cost. */
9675 if (nregs > 1)
9676 {
9677 /* Spill. */
9678 prologue_cost
9679 += record_stmt_cost (cost_vec, ncopies, vector_store,
9680 stmt_info, 0, vect_epilogue);
9681 /* Loads. */
9682 prologue_cost
9683 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9684 stmt_info, 0, vect_epilogue);
9685 }
9686 }
9687 }
9688 if (dump_enabled_p ())
9689 dump_printf_loc (MSG_NOTE, vect_location,
9690 "vect_model_store_cost: inside_cost = %d, "
9691 "prologue_cost = %d .\n",
9692 inside_cost, prologue_cost);
9693 }
9694
9695 return true;
9696 }
9697
9698 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9699 VECTOR_CST mask. No checks are made that the target platform supports the
9700 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9701 vect_gen_perm_mask_checked. */
9702
9703 tree
9704 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9705 {
9706 tree mask_type;
9707
9708 poly_uint64 nunits = sel.length ();
9709 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9710
9711 mask_type = build_vector_type (ssizetype, nunits);
9712 return vec_perm_indices_to_tree (mask_type, sel);
9713 }
9714
9715 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9716 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9717
9718 tree
9719 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9720 {
9721 machine_mode vmode = TYPE_MODE (vectype);
9722 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9723 return vect_gen_perm_mask_any (vectype, sel);
9724 }
9725
9726 /* Given a vector variable X and Y, that was generated for the scalar
9727 STMT_INFO, generate instructions to permute the vector elements of X and Y
9728 using permutation mask MASK_VEC, insert them at *GSI and return the
9729 permuted vector variable. */
9730
9731 static tree
9732 permute_vec_elements (vec_info *vinfo,
9733 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9734 gimple_stmt_iterator *gsi)
9735 {
9736 tree vectype = TREE_TYPE (x);
9737 tree perm_dest, data_ref;
9738 gimple *perm_stmt;
9739
9740 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9741 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9742 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9743 else
9744 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9745 data_ref = make_ssa_name (perm_dest);
9746
9747 /* Generate the permute statement. */
9748 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9749 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9750
9751 return data_ref;
9752 }
9753
9754 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9755 inserting them on the loops preheader edge. Returns true if we
9756 were successful in doing so (and thus STMT_INFO can be moved then),
9757 otherwise returns false. HOIST_P indicates if we want to hoist the
9758 definitions of all SSA uses, it would be false when we are costing. */
9759
9760 static bool
9761 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9762 {
9763 ssa_op_iter i;
9764 tree op;
9765 bool any = false;
9766
9767 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9768 {
9769 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9770 if (!gimple_nop_p (def_stmt)
9771 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9772 {
9773 /* Make sure we don't need to recurse. While we could do
9774 so in simple cases when there are more complex use webs
9775 we don't have an easy way to preserve stmt order to fulfil
9776 dependencies within them. */
9777 tree op2;
9778 ssa_op_iter i2;
9779 if (gimple_code (def_stmt) == GIMPLE_PHI)
9780 return false;
9781 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9782 {
9783 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9784 if (!gimple_nop_p (def_stmt2)
9785 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9786 return false;
9787 }
9788 any = true;
9789 }
9790 }
9791
9792 if (!any)
9793 return true;
9794
9795 if (!hoist_p)
9796 return true;
9797
9798 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9799 {
9800 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9801 if (!gimple_nop_p (def_stmt)
9802 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9803 {
9804 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9805 gsi_remove (&gsi, false);
9806 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9807 }
9808 }
9809
9810 return true;
9811 }
9812
9813 /* vectorizable_load.
9814
9815 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9816 that can be vectorized.
9817 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9818 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9819 Return true if STMT_INFO is vectorizable in this way. */
9820
9821 static bool
9822 vectorizable_load (vec_info *vinfo,
9823 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9824 gimple **vec_stmt, slp_tree slp_node,
9825 stmt_vector_for_cost *cost_vec)
9826 {
9827 tree scalar_dest;
9828 tree vec_dest = NULL;
9829 tree data_ref = NULL;
9830 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9831 class loop *loop = NULL;
9832 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9833 bool nested_in_vect_loop = false;
9834 tree elem_type;
9835 /* Avoid false positive uninitialized warning, see PR110652. */
9836 tree new_temp = NULL_TREE;
9837 machine_mode mode;
9838 tree dummy;
9839 tree dataref_ptr = NULL_TREE;
9840 tree dataref_offset = NULL_TREE;
9841 gimple *ptr_incr = NULL;
9842 int ncopies;
9843 int i, j;
9844 unsigned int group_size;
9845 poly_uint64 group_gap_adj;
9846 tree msq = NULL_TREE, lsq;
9847 tree realignment_token = NULL_TREE;
9848 gphi *phi = NULL;
9849 vec<tree> dr_chain = vNULL;
9850 bool grouped_load = false;
9851 stmt_vec_info first_stmt_info;
9852 stmt_vec_info first_stmt_info_for_drptr = NULL;
9853 bool compute_in_loop = false;
9854 class loop *at_loop;
9855 int vec_num;
9856 bool slp = (slp_node != NULL);
9857 bool slp_perm = false;
9858 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9859 poly_uint64 vf;
9860 tree aggr_type;
9861 gather_scatter_info gs_info;
9862 tree ref_type;
9863 enum vect_def_type mask_dt = vect_unknown_def_type;
9864
9865 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9866 return false;
9867
9868 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9869 && ! vec_stmt)
9870 return false;
9871
9872 if (!STMT_VINFO_DATA_REF (stmt_info))
9873 return false;
9874
9875 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9876 int mask_index = -1;
9877 slp_tree slp_op = NULL;
9878 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9879 {
9880 scalar_dest = gimple_assign_lhs (assign);
9881 if (TREE_CODE (scalar_dest) != SSA_NAME)
9882 return false;
9883
9884 tree_code code = gimple_assign_rhs_code (assign);
9885 if (code != ARRAY_REF
9886 && code != BIT_FIELD_REF
9887 && code != INDIRECT_REF
9888 && code != COMPONENT_REF
9889 && code != IMAGPART_EXPR
9890 && code != REALPART_EXPR
9891 && code != MEM_REF
9892 && TREE_CODE_CLASS (code) != tcc_declaration)
9893 return false;
9894 }
9895 else
9896 {
9897 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9898 if (!call || !gimple_call_internal_p (call))
9899 return false;
9900
9901 internal_fn ifn = gimple_call_internal_fn (call);
9902 if (!internal_load_fn_p (ifn))
9903 return false;
9904
9905 scalar_dest = gimple_call_lhs (call);
9906 if (!scalar_dest)
9907 return false;
9908
9909 mask_index = internal_fn_mask_index (ifn);
9910 if (mask_index >= 0 && slp_node)
9911 mask_index = vect_slp_child_index_for_operand
9912 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9913 if (mask_index >= 0
9914 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9915 &mask, &slp_op, &mask_dt, &mask_vectype))
9916 return false;
9917 }
9918
9919 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9920 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9921
9922 if (loop_vinfo)
9923 {
9924 loop = LOOP_VINFO_LOOP (loop_vinfo);
9925 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9926 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9927 }
9928 else
9929 vf = 1;
9930
9931 /* Multiple types in SLP are handled by creating the appropriate number of
9932 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9933 case of SLP. */
9934 if (slp)
9935 ncopies = 1;
9936 else
9937 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9938
9939 gcc_assert (ncopies >= 1);
9940
9941 /* FORNOW. This restriction should be relaxed. */
9942 if (nested_in_vect_loop && ncopies > 1)
9943 {
9944 if (dump_enabled_p ())
9945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9946 "multiple types in nested loop.\n");
9947 return false;
9948 }
9949
9950 /* Invalidate assumptions made by dependence analysis when vectorization
9951 on the unrolled body effectively re-orders stmts. */
9952 if (ncopies > 1
9953 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9954 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9955 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9956 {
9957 if (dump_enabled_p ())
9958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9959 "cannot perform implicit CSE when unrolling "
9960 "with negative dependence distance\n");
9961 return false;
9962 }
9963
9964 elem_type = TREE_TYPE (vectype);
9965 mode = TYPE_MODE (vectype);
9966
9967 /* FORNOW. In some cases can vectorize even if data-type not supported
9968 (e.g. - data copies). */
9969 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9970 {
9971 if (dump_enabled_p ())
9972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9973 "Aligned load, but unsupported type.\n");
9974 return false;
9975 }
9976
9977 /* Check if the load is a part of an interleaving chain. */
9978 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9979 {
9980 grouped_load = true;
9981 /* FORNOW */
9982 gcc_assert (!nested_in_vect_loop);
9983 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9984
9985 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9986 group_size = DR_GROUP_SIZE (first_stmt_info);
9987
9988 /* Refuse non-SLP vectorization of SLP-only groups. */
9989 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9990 {
9991 if (dump_enabled_p ())
9992 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9993 "cannot vectorize load in non-SLP mode.\n");
9994 return false;
9995 }
9996
9997 /* Invalidate assumptions made by dependence analysis when vectorization
9998 on the unrolled body effectively re-orders stmts. */
9999 if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
10000 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
10001 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
10002 {
10003 if (dump_enabled_p ())
10004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10005 "cannot perform implicit CSE when performing "
10006 "group loads with negative dependence distance\n");
10007 return false;
10008 }
10009 }
10010 else
10011 group_size = 1;
10012
10013 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10014 {
10015 slp_perm = true;
10016
10017 if (!loop_vinfo)
10018 {
10019 /* In BB vectorization we may not actually use a loaded vector
10020 accessing elements in excess of DR_GROUP_SIZE. */
10021 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10022 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
10023 unsigned HOST_WIDE_INT nunits;
10024 unsigned j, k, maxk = 0;
10025 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
10026 if (k > maxk)
10027 maxk = k;
10028 tree vectype = SLP_TREE_VECTYPE (slp_node);
10029 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
10030 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
10031 {
10032 if (dump_enabled_p ())
10033 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10034 "BB vectorization with gaps at the end of "
10035 "a load is not supported\n");
10036 return false;
10037 }
10038 }
10039
10040 auto_vec<tree> tem;
10041 unsigned n_perms;
10042 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
10043 true, &n_perms))
10044 {
10045 if (dump_enabled_p ())
10046 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
10047 vect_location,
10048 "unsupported load permutation\n");
10049 return false;
10050 }
10051 }
10052
10053 vect_memory_access_type memory_access_type;
10054 enum dr_alignment_support alignment_support_scheme;
10055 int misalignment;
10056 poly_int64 poffset;
10057 internal_fn lanes_ifn;
10058 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
10059 ncopies, &memory_access_type, &poffset,
10060 &alignment_support_scheme, &misalignment, &gs_info,
10061 &lanes_ifn))
10062 return false;
10063
10064 if (mask)
10065 {
10066 if (memory_access_type == VMAT_CONTIGUOUS)
10067 {
10068 machine_mode vec_mode = TYPE_MODE (vectype);
10069 if (!VECTOR_MODE_P (vec_mode)
10070 || !can_vec_mask_load_store_p (vec_mode,
10071 TYPE_MODE (mask_vectype), true))
10072 return false;
10073 }
10074 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10075 && memory_access_type != VMAT_GATHER_SCATTER)
10076 {
10077 if (dump_enabled_p ())
10078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10079 "unsupported access type for masked load.\n");
10080 return false;
10081 }
10082 else if (memory_access_type == VMAT_GATHER_SCATTER
10083 && gs_info.ifn == IFN_LAST
10084 && !gs_info.decl)
10085 {
10086 if (dump_enabled_p ())
10087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10088 "unsupported masked emulated gather.\n");
10089 return false;
10090 }
10091 else if (memory_access_type == VMAT_ELEMENTWISE
10092 || memory_access_type == VMAT_STRIDED_SLP)
10093 {
10094 if (dump_enabled_p ())
10095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10096 "unsupported masked strided access.\n");
10097 return false;
10098 }
10099 }
10100
10101 bool costing_p = !vec_stmt;
10102
10103 if (costing_p) /* transformation not required. */
10104 {
10105 if (slp_node
10106 && mask
10107 && !vect_maybe_update_slp_op_vectype (slp_op,
10108 mask_vectype))
10109 {
10110 if (dump_enabled_p ())
10111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10112 "incompatible vector types for invariants\n");
10113 return false;
10114 }
10115
10116 if (!slp)
10117 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10118
10119 if (loop_vinfo
10120 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10121 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10122 VLS_LOAD, group_size,
10123 memory_access_type, &gs_info,
10124 mask);
10125
10126 if (dump_enabled_p ()
10127 && memory_access_type != VMAT_ELEMENTWISE
10128 && memory_access_type != VMAT_GATHER_SCATTER
10129 && alignment_support_scheme != dr_aligned)
10130 dump_printf_loc (MSG_NOTE, vect_location,
10131 "Vectorizing an unaligned access.\n");
10132
10133 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10134 vinfo->any_known_not_updated_vssa = true;
10135
10136 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10137 }
10138
10139 if (!slp)
10140 gcc_assert (memory_access_type
10141 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10142
10143 if (dump_enabled_p () && !costing_p)
10144 dump_printf_loc (MSG_NOTE, vect_location,
10145 "transform load. ncopies = %d\n", ncopies);
10146
10147 /* Transform. */
10148
10149 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10150 ensure_base_align (dr_info);
10151
10152 if (memory_access_type == VMAT_INVARIANT)
10153 {
10154 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10155 /* If we have versioned for aliasing or the loop doesn't
10156 have any data dependencies that would preclude this,
10157 then we are sure this is a loop invariant load and
10158 thus we can insert it on the preheader edge.
10159 TODO: hoist_defs_of_uses should ideally be computed
10160 once at analysis time, remembered and used in the
10161 transform time. */
10162 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10163 && !nested_in_vect_loop
10164 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
10165 if (costing_p)
10166 {
10167 enum vect_cost_model_location cost_loc
10168 = hoist_p ? vect_prologue : vect_body;
10169 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10170 stmt_info, 0, cost_loc);
10171 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10172 cost_loc);
10173 unsigned int prologue_cost = hoist_p ? cost : 0;
10174 unsigned int inside_cost = hoist_p ? 0 : cost;
10175 if (dump_enabled_p ())
10176 dump_printf_loc (MSG_NOTE, vect_location,
10177 "vect_model_load_cost: inside_cost = %d, "
10178 "prologue_cost = %d .\n",
10179 inside_cost, prologue_cost);
10180 return true;
10181 }
10182 if (hoist_p)
10183 {
10184 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10185 if (dump_enabled_p ())
10186 dump_printf_loc (MSG_NOTE, vect_location,
10187 "hoisting out of the vectorized loop: %G",
10188 (gimple *) stmt);
10189 scalar_dest = copy_ssa_name (scalar_dest);
10190 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10191 edge pe = loop_preheader_edge (loop);
10192 gphi *vphi = get_virtual_phi (loop->header);
10193 tree vuse;
10194 if (vphi)
10195 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10196 else
10197 vuse = gimple_vuse (gsi_stmt (*gsi));
10198 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10199 gimple_set_vuse (new_stmt, vuse);
10200 gsi_insert_on_edge_immediate (pe, new_stmt);
10201 }
10202 /* These copies are all equivalent. */
10203 if (hoist_p)
10204 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10205 vectype, NULL);
10206 else
10207 {
10208 gimple_stmt_iterator gsi2 = *gsi;
10209 gsi_next (&gsi2);
10210 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10211 vectype, &gsi2);
10212 }
10213 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10214 if (slp)
10215 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10216 slp_node->push_vec_def (new_stmt);
10217 else
10218 {
10219 for (j = 0; j < ncopies; ++j)
10220 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10221 *vec_stmt = new_stmt;
10222 }
10223 return true;
10224 }
10225
10226 if (memory_access_type == VMAT_ELEMENTWISE
10227 || memory_access_type == VMAT_STRIDED_SLP)
10228 {
10229 gimple_stmt_iterator incr_gsi;
10230 bool insert_after;
10231 tree offvar;
10232 tree ivstep;
10233 tree running_off;
10234 vec<constructor_elt, va_gc> *v = NULL;
10235 tree stride_base, stride_step, alias_off;
10236 /* Checked by get_load_store_type. */
10237 unsigned int const_nunits = nunits.to_constant ();
10238 unsigned HOST_WIDE_INT cst_offset = 0;
10239 tree dr_offset;
10240 unsigned int inside_cost = 0;
10241
10242 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10243 gcc_assert (!nested_in_vect_loop);
10244
10245 if (grouped_load)
10246 {
10247 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10248 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10249 }
10250 else
10251 {
10252 first_stmt_info = stmt_info;
10253 first_dr_info = dr_info;
10254 }
10255
10256 if (slp && grouped_load)
10257 {
10258 group_size = DR_GROUP_SIZE (first_stmt_info);
10259 ref_type = get_group_alias_ptr_type (first_stmt_info);
10260 }
10261 else
10262 {
10263 if (grouped_load)
10264 cst_offset
10265 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10266 * vect_get_place_in_interleaving_chain (stmt_info,
10267 first_stmt_info));
10268 group_size = 1;
10269 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10270 }
10271
10272 if (!costing_p)
10273 {
10274 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10275 stride_base = fold_build_pointer_plus (
10276 DR_BASE_ADDRESS (first_dr_info->dr),
10277 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10278 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10279 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10280
10281 /* For a load with loop-invariant (but other than power-of-2)
10282 stride (i.e. not a grouped access) like so:
10283
10284 for (i = 0; i < n; i += stride)
10285 ... = array[i];
10286
10287 we generate a new induction variable and new accesses to
10288 form a new vector (or vectors, depending on ncopies):
10289
10290 for (j = 0; ; j += VF*stride)
10291 tmp1 = array[j];
10292 tmp2 = array[j + stride];
10293 ...
10294 vectemp = {tmp1, tmp2, ...}
10295 */
10296
10297 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10298 build_int_cst (TREE_TYPE (stride_step), vf));
10299
10300 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10301
10302 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10303 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10304 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10305 loop, &incr_gsi, insert_after,
10306 &offvar, NULL);
10307
10308 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10309 }
10310
10311 running_off = offvar;
10312 alias_off = build_int_cst (ref_type, 0);
10313 int nloads = const_nunits;
10314 int lnel = 1;
10315 tree ltype = TREE_TYPE (vectype);
10316 tree lvectype = vectype;
10317 auto_vec<tree> dr_chain;
10318 if (memory_access_type == VMAT_STRIDED_SLP)
10319 {
10320 if (group_size < const_nunits)
10321 {
10322 /* First check if vec_init optab supports construction from vector
10323 elts directly. Otherwise avoid emitting a constructor of
10324 vector elements by performing the loads using an integer type
10325 of the same size, constructing a vector of those and then
10326 re-interpreting it as the original vector type. This avoids a
10327 huge runtime penalty due to the general inability to perform
10328 store forwarding from smaller stores to a larger load. */
10329 tree ptype;
10330 tree vtype
10331 = vector_vector_composition_type (vectype,
10332 const_nunits / group_size,
10333 &ptype);
10334 if (vtype != NULL_TREE)
10335 {
10336 nloads = const_nunits / group_size;
10337 lnel = group_size;
10338 lvectype = vtype;
10339 ltype = ptype;
10340 }
10341 }
10342 else
10343 {
10344 nloads = 1;
10345 lnel = const_nunits;
10346 ltype = vectype;
10347 }
10348 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10349 }
10350 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10351 else if (nloads == 1)
10352 ltype = vectype;
10353
10354 if (slp)
10355 {
10356 /* For SLP permutation support we need to load the whole group,
10357 not only the number of vector stmts the permutation result
10358 fits in. */
10359 if (slp_perm)
10360 {
10361 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10362 variable VF. */
10363 unsigned int const_vf = vf.to_constant ();
10364 ncopies = CEIL (group_size * const_vf, const_nunits);
10365 dr_chain.create (ncopies);
10366 }
10367 else
10368 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10369 }
10370 unsigned int group_el = 0;
10371 unsigned HOST_WIDE_INT
10372 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10373 unsigned int n_groups = 0;
10374 /* For costing some adjacent vector loads, we'd like to cost with
10375 the total number of them once instead of cost each one by one. */
10376 unsigned int n_adjacent_loads = 0;
10377 for (j = 0; j < ncopies; j++)
10378 {
10379 if (nloads > 1 && !costing_p)
10380 vec_alloc (v, nloads);
10381 gimple *new_stmt = NULL;
10382 for (i = 0; i < nloads; i++)
10383 {
10384 if (costing_p)
10385 {
10386 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10387 avoid ICE, see PR110776. */
10388 if (VECTOR_TYPE_P (ltype)
10389 && memory_access_type != VMAT_ELEMENTWISE)
10390 n_adjacent_loads++;
10391 else
10392 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10393 stmt_info, 0, vect_body);
10394 continue;
10395 }
10396 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10397 group_el * elsz + cst_offset);
10398 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10399 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10400 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10401 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10402 if (nloads > 1)
10403 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10404 gimple_assign_lhs (new_stmt));
10405
10406 group_el += lnel;
10407 if (! slp
10408 || group_el == group_size)
10409 {
10410 n_groups++;
10411 /* When doing SLP make sure to not load elements from
10412 the next vector iteration, those will not be accessed
10413 so just use the last element again. See PR107451. */
10414 if (!slp || known_lt (n_groups, vf))
10415 {
10416 tree newoff = copy_ssa_name (running_off);
10417 gimple *incr
10418 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10419 running_off, stride_step);
10420 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10421 running_off = newoff;
10422 }
10423 group_el = 0;
10424 }
10425 }
10426
10427 if (nloads > 1)
10428 {
10429 if (costing_p)
10430 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10431 stmt_info, 0, vect_body);
10432 else
10433 {
10434 tree vec_inv = build_constructor (lvectype, v);
10435 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10436 lvectype, gsi);
10437 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10438 if (lvectype != vectype)
10439 {
10440 new_stmt
10441 = gimple_build_assign (make_ssa_name (vectype),
10442 VIEW_CONVERT_EXPR,
10443 build1 (VIEW_CONVERT_EXPR,
10444 vectype, new_temp));
10445 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10446 gsi);
10447 }
10448 }
10449 }
10450
10451 if (!costing_p)
10452 {
10453 if (slp)
10454 {
10455 if (slp_perm)
10456 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10457 else
10458 slp_node->push_vec_def (new_stmt);
10459 }
10460 else
10461 {
10462 if (j == 0)
10463 *vec_stmt = new_stmt;
10464 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10465 }
10466 }
10467 }
10468 if (slp_perm)
10469 {
10470 unsigned n_perms;
10471 if (costing_p)
10472 {
10473 unsigned n_loads;
10474 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10475 true, &n_perms, &n_loads);
10476 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10477 first_stmt_info, 0, vect_body);
10478 }
10479 else
10480 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10481 false, &n_perms);
10482 }
10483
10484 if (costing_p)
10485 {
10486 if (n_adjacent_loads > 0)
10487 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10488 alignment_support_scheme, misalignment, false,
10489 &inside_cost, nullptr, cost_vec, cost_vec,
10490 true);
10491 if (dump_enabled_p ())
10492 dump_printf_loc (MSG_NOTE, vect_location,
10493 "vect_model_load_cost: inside_cost = %u, "
10494 "prologue_cost = 0 .\n",
10495 inside_cost);
10496 }
10497
10498 return true;
10499 }
10500
10501 if (memory_access_type == VMAT_GATHER_SCATTER
10502 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10503 grouped_load = false;
10504
10505 if (grouped_load
10506 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10507 {
10508 if (grouped_load)
10509 {
10510 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10511 group_size = DR_GROUP_SIZE (first_stmt_info);
10512 }
10513 else
10514 {
10515 first_stmt_info = stmt_info;
10516 group_size = 1;
10517 }
10518 /* For SLP vectorization we directly vectorize a subchain
10519 without permutation. */
10520 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10521 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10522 /* For BB vectorization always use the first stmt to base
10523 the data ref pointer on. */
10524 if (bb_vinfo)
10525 first_stmt_info_for_drptr
10526 = vect_find_first_scalar_stmt_in_slp (slp_node);
10527
10528 /* Check if the chain of loads is already vectorized. */
10529 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10530 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10531 ??? But we can only do so if there is exactly one
10532 as we have no way to get at the rest. Leave the CSE
10533 opportunity alone.
10534 ??? With the group load eventually participating
10535 in multiple different permutations (having multiple
10536 slp nodes which refer to the same group) the CSE
10537 is even wrong code. See PR56270. */
10538 && !slp)
10539 {
10540 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10541 return true;
10542 }
10543 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10544 group_gap_adj = 0;
10545
10546 /* VEC_NUM is the number of vect stmts to be created for this group. */
10547 if (slp)
10548 {
10549 grouped_load = false;
10550 /* If an SLP permutation is from N elements to N elements,
10551 and if one vector holds a whole number of N, we can load
10552 the inputs to the permutation in the same way as an
10553 unpermuted sequence. In other cases we need to load the
10554 whole group, not only the number of vector stmts the
10555 permutation result fits in. */
10556 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10557 if (slp_perm
10558 && (group_size != scalar_lanes
10559 || !multiple_p (nunits, group_size)))
10560 {
10561 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10562 variable VF; see vect_transform_slp_perm_load. */
10563 unsigned int const_vf = vf.to_constant ();
10564 unsigned int const_nunits = nunits.to_constant ();
10565 vec_num = CEIL (group_size * const_vf, const_nunits);
10566 group_gap_adj = vf * group_size - nunits * vec_num;
10567 }
10568 else
10569 {
10570 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10571 group_gap_adj
10572 = group_size - scalar_lanes;
10573 }
10574 }
10575 else
10576 vec_num = group_size;
10577
10578 ref_type = get_group_alias_ptr_type (first_stmt_info);
10579 }
10580 else
10581 {
10582 first_stmt_info = stmt_info;
10583 first_dr_info = dr_info;
10584 group_size = vec_num = 1;
10585 group_gap_adj = 0;
10586 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10587 if (slp)
10588 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10589 }
10590
10591 gcc_assert (alignment_support_scheme);
10592 vec_loop_masks *loop_masks
10593 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10594 ? &LOOP_VINFO_MASKS (loop_vinfo)
10595 : NULL);
10596 vec_loop_lens *loop_lens
10597 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10598 ? &LOOP_VINFO_LENS (loop_vinfo)
10599 : NULL);
10600
10601 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
10602 are some difference here. We cannot enable both the lens and masks
10603 during transform but it is allowed during analysis.
10604 Shouldn't go with length-based approach if fully masked. */
10605 if (cost_vec == NULL)
10606 /* The cost_vec is NULL during transfrom. */
10607 gcc_assert ((!loop_lens || !loop_masks));
10608
10609 /* Targets with store-lane instructions must not require explicit
10610 realignment. vect_supportable_dr_alignment always returns either
10611 dr_aligned or dr_unaligned_supported for masked operations. */
10612 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10613 && !mask
10614 && !loop_masks)
10615 || alignment_support_scheme == dr_aligned
10616 || alignment_support_scheme == dr_unaligned_supported);
10617
10618 /* In case the vectorization factor (VF) is bigger than the number
10619 of elements that we can fit in a vectype (nunits), we have to generate
10620 more than one vector stmt - i.e - we need to "unroll" the
10621 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10622 from one copy of the vector stmt to the next, in the field
10623 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10624 stages to find the correct vector defs to be used when vectorizing
10625 stmts that use the defs of the current stmt. The example below
10626 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10627 need to create 4 vectorized stmts):
10628
10629 before vectorization:
10630 RELATED_STMT VEC_STMT
10631 S1: x = memref - -
10632 S2: z = x + 1 - -
10633
10634 step 1: vectorize stmt S1:
10635 We first create the vector stmt VS1_0, and, as usual, record a
10636 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10637 Next, we create the vector stmt VS1_1, and record a pointer to
10638 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10639 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10640 stmts and pointers:
10641 RELATED_STMT VEC_STMT
10642 VS1_0: vx0 = memref0 VS1_1 -
10643 VS1_1: vx1 = memref1 VS1_2 -
10644 VS1_2: vx2 = memref2 VS1_3 -
10645 VS1_3: vx3 = memref3 - -
10646 S1: x = load - VS1_0
10647 S2: z = x + 1 - -
10648 */
10649
10650 /* In case of interleaving (non-unit grouped access):
10651
10652 S1: x2 = &base + 2
10653 S2: x0 = &base
10654 S3: x1 = &base + 1
10655 S4: x3 = &base + 3
10656
10657 Vectorized loads are created in the order of memory accesses
10658 starting from the access of the first stmt of the chain:
10659
10660 VS1: vx0 = &base
10661 VS2: vx1 = &base + vec_size*1
10662 VS3: vx3 = &base + vec_size*2
10663 VS4: vx4 = &base + vec_size*3
10664
10665 Then permutation statements are generated:
10666
10667 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10668 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10669 ...
10670
10671 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10672 (the order of the data-refs in the output of vect_permute_load_chain
10673 corresponds to the order of scalar stmts in the interleaving chain - see
10674 the documentation of vect_permute_load_chain()).
10675 The generation of permutation stmts and recording them in
10676 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10677
10678 In case of both multiple types and interleaving, the vector loads and
10679 permutation stmts above are created for every copy. The result vector
10680 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10681 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10682
10683 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10684 on a target that supports unaligned accesses (dr_unaligned_supported)
10685 we generate the following code:
10686 p = initial_addr;
10687 indx = 0;
10688 loop {
10689 p = p + indx * vectype_size;
10690 vec_dest = *(p);
10691 indx = indx + 1;
10692 }
10693
10694 Otherwise, the data reference is potentially unaligned on a target that
10695 does not support unaligned accesses (dr_explicit_realign_optimized) -
10696 then generate the following code, in which the data in each iteration is
10697 obtained by two vector loads, one from the previous iteration, and one
10698 from the current iteration:
10699 p1 = initial_addr;
10700 msq_init = *(floor(p1))
10701 p2 = initial_addr + VS - 1;
10702 realignment_token = call target_builtin;
10703 indx = 0;
10704 loop {
10705 p2 = p2 + indx * vectype_size
10706 lsq = *(floor(p2))
10707 vec_dest = realign_load (msq, lsq, realignment_token)
10708 indx = indx + 1;
10709 msq = lsq;
10710 } */
10711
10712 /* If the misalignment remains the same throughout the execution of the
10713 loop, we can create the init_addr and permutation mask at the loop
10714 preheader. Otherwise, it needs to be created inside the loop.
10715 This can only occur when vectorizing memory accesses in the inner-loop
10716 nested within an outer-loop that is being vectorized. */
10717
10718 if (nested_in_vect_loop
10719 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10720 GET_MODE_SIZE (TYPE_MODE (vectype))))
10721 {
10722 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10723 compute_in_loop = true;
10724 }
10725
10726 bool diff_first_stmt_info
10727 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10728
10729 tree offset = NULL_TREE;
10730 if ((alignment_support_scheme == dr_explicit_realign_optimized
10731 || alignment_support_scheme == dr_explicit_realign)
10732 && !compute_in_loop)
10733 {
10734 /* If we have different first_stmt_info, we can't set up realignment
10735 here, since we can't guarantee first_stmt_info DR has been
10736 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10737 distance from first_stmt_info DR instead as below. */
10738 if (!costing_p)
10739 {
10740 if (!diff_first_stmt_info)
10741 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10742 &realignment_token,
10743 alignment_support_scheme, NULL_TREE,
10744 &at_loop);
10745 if (alignment_support_scheme == dr_explicit_realign_optimized)
10746 {
10747 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10748 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10749 size_one_node);
10750 gcc_assert (!first_stmt_info_for_drptr);
10751 }
10752 }
10753 }
10754 else
10755 at_loop = loop;
10756
10757 if (!known_eq (poffset, 0))
10758 offset = (offset
10759 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10760 : size_int (poffset));
10761
10762 tree bump;
10763 tree vec_offset = NULL_TREE;
10764 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10765 {
10766 aggr_type = NULL_TREE;
10767 bump = NULL_TREE;
10768 }
10769 else if (memory_access_type == VMAT_GATHER_SCATTER)
10770 {
10771 aggr_type = elem_type;
10772 if (!costing_p)
10773 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10774 &bump, &vec_offset, loop_lens);
10775 }
10776 else
10777 {
10778 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10779 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10780 else
10781 aggr_type = vectype;
10782 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10783 memory_access_type, loop_lens);
10784 }
10785
10786 auto_vec<tree> vec_offsets;
10787 auto_vec<tree> vec_masks;
10788 if (mask && !costing_p)
10789 {
10790 if (slp_node)
10791 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10792 &vec_masks);
10793 else
10794 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10795 &vec_masks, mask_vectype);
10796 }
10797
10798 tree vec_mask = NULL_TREE;
10799 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10800 {
10801 gcc_assert (alignment_support_scheme == dr_aligned
10802 || alignment_support_scheme == dr_unaligned_supported);
10803 gcc_assert (grouped_load && !slp);
10804
10805 unsigned int inside_cost = 0, prologue_cost = 0;
10806 /* For costing some adjacent vector loads, we'd like to cost with
10807 the total number of them once instead of cost each one by one. */
10808 unsigned int n_adjacent_loads = 0;
10809 for (j = 0; j < ncopies; j++)
10810 {
10811 if (costing_p)
10812 {
10813 /* An IFN_LOAD_LANES will load all its vector results,
10814 regardless of which ones we actually need. Account
10815 for the cost of unused results. */
10816 if (first_stmt_info == stmt_info)
10817 {
10818 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10819 stmt_vec_info next_stmt_info = first_stmt_info;
10820 do
10821 {
10822 gaps -= 1;
10823 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10824 }
10825 while (next_stmt_info);
10826 if (gaps)
10827 {
10828 if (dump_enabled_p ())
10829 dump_printf_loc (MSG_NOTE, vect_location,
10830 "vect_model_load_cost: %d "
10831 "unused vectors.\n",
10832 gaps);
10833 vect_get_load_cost (vinfo, stmt_info, gaps,
10834 alignment_support_scheme,
10835 misalignment, false, &inside_cost,
10836 &prologue_cost, cost_vec, cost_vec,
10837 true);
10838 }
10839 }
10840 n_adjacent_loads++;
10841 continue;
10842 }
10843
10844 /* 1. Create the vector or array pointer update chain. */
10845 if (j == 0)
10846 dataref_ptr
10847 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10848 at_loop, offset, &dummy, gsi,
10849 &ptr_incr, false, bump);
10850 else
10851 {
10852 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10853 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10854 stmt_info, bump);
10855 }
10856 if (mask)
10857 vec_mask = vec_masks[j];
10858
10859 tree vec_array = create_vector_array (vectype, vec_num);
10860
10861 tree final_mask = NULL_TREE;
10862 tree final_len = NULL_TREE;
10863 tree bias = NULL_TREE;
10864 if (loop_masks)
10865 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10866 ncopies, vectype, j);
10867 if (vec_mask)
10868 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10869 vec_mask, gsi);
10870
10871 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10872 {
10873 if (loop_lens)
10874 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10875 ncopies, vectype, j, 1);
10876 else
10877 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10878 signed char biasval
10879 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10880 bias = build_int_cst (intQI_type_node, biasval);
10881 if (!final_mask)
10882 {
10883 mask_vectype = truth_type_for (vectype);
10884 final_mask = build_minus_one_cst (mask_vectype);
10885 }
10886 }
10887
10888 gcall *call;
10889 if (final_len && final_mask)
10890 {
10891 /* Emit:
10892 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10893 VEC_MASK, LEN, BIAS). */
10894 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10895 tree alias_ptr = build_int_cst (ref_type, align);
10896 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10897 dataref_ptr, alias_ptr,
10898 final_mask, final_len, bias);
10899 }
10900 else if (final_mask)
10901 {
10902 /* Emit:
10903 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10904 VEC_MASK). */
10905 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10906 tree alias_ptr = build_int_cst (ref_type, align);
10907 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10908 dataref_ptr, alias_ptr,
10909 final_mask);
10910 }
10911 else
10912 {
10913 /* Emit:
10914 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10915 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10916 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10917 }
10918 gimple_call_set_lhs (call, vec_array);
10919 gimple_call_set_nothrow (call, true);
10920 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10921
10922 dr_chain.create (vec_num);
10923 /* Extract each vector into an SSA_NAME. */
10924 for (i = 0; i < vec_num; i++)
10925 {
10926 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10927 vec_array, i);
10928 dr_chain.quick_push (new_temp);
10929 }
10930
10931 /* Record the mapping between SSA_NAMEs and statements. */
10932 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10933
10934 /* Record that VEC_ARRAY is now dead. */
10935 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10936
10937 dr_chain.release ();
10938
10939 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10940 }
10941
10942 if (costing_p)
10943 {
10944 if (n_adjacent_loads > 0)
10945 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10946 alignment_support_scheme, misalignment, false,
10947 &inside_cost, &prologue_cost, cost_vec,
10948 cost_vec, true);
10949 if (dump_enabled_p ())
10950 dump_printf_loc (MSG_NOTE, vect_location,
10951 "vect_model_load_cost: inside_cost = %u, "
10952 "prologue_cost = %u .\n",
10953 inside_cost, prologue_cost);
10954 }
10955
10956 return true;
10957 }
10958
10959 if (memory_access_type == VMAT_GATHER_SCATTER)
10960 {
10961 gcc_assert (alignment_support_scheme == dr_aligned
10962 || alignment_support_scheme == dr_unaligned_supported);
10963 gcc_assert (!grouped_load && !slp_perm);
10964
10965 unsigned int inside_cost = 0, prologue_cost = 0;
10966 for (j = 0; j < ncopies; j++)
10967 {
10968 /* 1. Create the vector or array pointer update chain. */
10969 if (j == 0 && !costing_p)
10970 {
10971 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10972 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10973 slp_node, &gs_info, &dataref_ptr,
10974 &vec_offsets);
10975 else
10976 dataref_ptr
10977 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10978 at_loop, offset, &dummy, gsi,
10979 &ptr_incr, false, bump);
10980 }
10981 else if (!costing_p)
10982 {
10983 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10984 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10985 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10986 gsi, stmt_info, bump);
10987 }
10988
10989 gimple *new_stmt = NULL;
10990 for (i = 0; i < vec_num; i++)
10991 {
10992 tree final_mask = NULL_TREE;
10993 tree final_len = NULL_TREE;
10994 tree bias = NULL_TREE;
10995 if (!costing_p)
10996 {
10997 if (mask)
10998 vec_mask = vec_masks[vec_num * j + i];
10999 if (loop_masks)
11000 final_mask
11001 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11002 vec_num * ncopies, vectype,
11003 vec_num * j + i);
11004 if (vec_mask)
11005 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11006 final_mask, vec_mask, gsi);
11007
11008 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11009 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11010 gsi, stmt_info, bump);
11011 }
11012
11013 /* 2. Create the vector-load in the loop. */
11014 unsigned HOST_WIDE_INT align;
11015 if (gs_info.ifn != IFN_LAST)
11016 {
11017 if (costing_p)
11018 {
11019 unsigned int cnunits = vect_nunits_for_cost (vectype);
11020 inside_cost
11021 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11022 stmt_info, 0, vect_body);
11023 continue;
11024 }
11025 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11026 vec_offset = vec_offsets[vec_num * j + i];
11027 tree zero = build_zero_cst (vectype);
11028 tree scale = size_int (gs_info.scale);
11029
11030 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
11031 {
11032 if (loop_lens)
11033 final_len
11034 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11035 vec_num * ncopies, vectype,
11036 vec_num * j + i, 1);
11037 else
11038 final_len
11039 = build_int_cst (sizetype,
11040 TYPE_VECTOR_SUBPARTS (vectype));
11041 signed char biasval
11042 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11043 bias = build_int_cst (intQI_type_node, biasval);
11044 if (!final_mask)
11045 {
11046 mask_vectype = truth_type_for (vectype);
11047 final_mask = build_minus_one_cst (mask_vectype);
11048 }
11049 }
11050
11051 gcall *call;
11052 if (final_len && final_mask)
11053 call
11054 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
11055 dataref_ptr, vec_offset,
11056 scale, zero, final_mask,
11057 final_len, bias);
11058 else if (final_mask)
11059 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
11060 dataref_ptr, vec_offset,
11061 scale, zero, final_mask);
11062 else
11063 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
11064 dataref_ptr, vec_offset,
11065 scale, zero);
11066 gimple_call_set_nothrow (call, true);
11067 new_stmt = call;
11068 data_ref = NULL_TREE;
11069 }
11070 else if (gs_info.decl)
11071 {
11072 /* The builtin decls path for gather is legacy, x86 only. */
11073 gcc_assert (!final_len && nunits.is_constant ());
11074 if (costing_p)
11075 {
11076 unsigned int cnunits = vect_nunits_for_cost (vectype);
11077 inside_cost
11078 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11079 stmt_info, 0, vect_body);
11080 continue;
11081 }
11082 poly_uint64 offset_nunits
11083 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
11084 if (known_eq (nunits, offset_nunits))
11085 {
11086 new_stmt = vect_build_one_gather_load_call
11087 (vinfo, stmt_info, gsi, &gs_info,
11088 dataref_ptr, vec_offsets[vec_num * j + i],
11089 final_mask);
11090 data_ref = NULL_TREE;
11091 }
11092 else if (known_eq (nunits, offset_nunits * 2))
11093 {
11094 /* We have a offset vector with half the number of
11095 lanes but the builtins will produce full vectype
11096 data with just the lower lanes filled. */
11097 new_stmt = vect_build_one_gather_load_call
11098 (vinfo, stmt_info, gsi, &gs_info,
11099 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
11100 final_mask);
11101 tree low = make_ssa_name (vectype);
11102 gimple_set_lhs (new_stmt, low);
11103 vect_finish_stmt_generation (vinfo, stmt_info,
11104 new_stmt, gsi);
11105
11106 /* now put upper half of final_mask in final_mask low. */
11107 if (final_mask
11108 && !SCALAR_INT_MODE_P
11109 (TYPE_MODE (TREE_TYPE (final_mask))))
11110 {
11111 int count = nunits.to_constant ();
11112 vec_perm_builder sel (count, count, 1);
11113 sel.quick_grow (count);
11114 for (int i = 0; i < count; ++i)
11115 sel[i] = i | (count / 2);
11116 vec_perm_indices indices (sel, 2, count);
11117 tree perm_mask = vect_gen_perm_mask_checked
11118 (TREE_TYPE (final_mask), indices);
11119 new_stmt = gimple_build_assign (NULL_TREE,
11120 VEC_PERM_EXPR,
11121 final_mask,
11122 final_mask,
11123 perm_mask);
11124 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11125 gimple_set_lhs (new_stmt, final_mask);
11126 vect_finish_stmt_generation (vinfo, stmt_info,
11127 new_stmt, gsi);
11128 }
11129 else if (final_mask)
11130 {
11131 new_stmt = gimple_build_assign (NULL_TREE,
11132 VEC_UNPACK_HI_EXPR,
11133 final_mask);
11134 final_mask = make_ssa_name
11135 (truth_type_for (gs_info.offset_vectype));
11136 gimple_set_lhs (new_stmt, final_mask);
11137 vect_finish_stmt_generation (vinfo, stmt_info,
11138 new_stmt, gsi);
11139 }
11140
11141 new_stmt = vect_build_one_gather_load_call
11142 (vinfo, stmt_info, gsi, &gs_info,
11143 dataref_ptr,
11144 vec_offsets[2 * vec_num * j + 2 * i + 1],
11145 final_mask);
11146 tree high = make_ssa_name (vectype);
11147 gimple_set_lhs (new_stmt, high);
11148 vect_finish_stmt_generation (vinfo, stmt_info,
11149 new_stmt, gsi);
11150
11151 /* compose low + high. */
11152 int count = nunits.to_constant ();
11153 vec_perm_builder sel (count, count, 1);
11154 sel.quick_grow (count);
11155 for (int i = 0; i < count; ++i)
11156 sel[i] = i < count / 2 ? i : i + count / 2;
11157 vec_perm_indices indices (sel, 2, count);
11158 tree perm_mask
11159 = vect_gen_perm_mask_checked (vectype, indices);
11160 new_stmt = gimple_build_assign (NULL_TREE,
11161 VEC_PERM_EXPR,
11162 low, high, perm_mask);
11163 data_ref = NULL_TREE;
11164 }
11165 else if (known_eq (nunits * 2, offset_nunits))
11166 {
11167 /* We have a offset vector with double the number of
11168 lanes. Select the low/high part accordingly. */
11169 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11170 if ((vec_num * j + i) & 1)
11171 {
11172 int count = offset_nunits.to_constant ();
11173 vec_perm_builder sel (count, count, 1);
11174 sel.quick_grow (count);
11175 for (int i = 0; i < count; ++i)
11176 sel[i] = i | (count / 2);
11177 vec_perm_indices indices (sel, 2, count);
11178 tree perm_mask = vect_gen_perm_mask_checked
11179 (TREE_TYPE (vec_offset), indices);
11180 new_stmt = gimple_build_assign (NULL_TREE,
11181 VEC_PERM_EXPR,
11182 vec_offset,
11183 vec_offset,
11184 perm_mask);
11185 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11186 gimple_set_lhs (new_stmt, vec_offset);
11187 vect_finish_stmt_generation (vinfo, stmt_info,
11188 new_stmt, gsi);
11189 }
11190 new_stmt = vect_build_one_gather_load_call
11191 (vinfo, stmt_info, gsi, &gs_info,
11192 dataref_ptr, vec_offset, final_mask);
11193 data_ref = NULL_TREE;
11194 }
11195 else
11196 gcc_unreachable ();
11197 }
11198 else
11199 {
11200 /* Emulated gather-scatter. */
11201 gcc_assert (!final_mask);
11202 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11203 if (costing_p)
11204 {
11205 /* For emulated gathers N offset vector element
11206 offset add is consumed by the load). */
11207 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11208 vec_to_scalar, stmt_info,
11209 0, vect_body);
11210 /* N scalar loads plus gathering them into a
11211 vector. */
11212 inside_cost
11213 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11214 stmt_info, 0, vect_body);
11215 inside_cost
11216 = record_stmt_cost (cost_vec, 1, vec_construct,
11217 stmt_info, 0, vect_body);
11218 continue;
11219 }
11220 unsigned HOST_WIDE_INT const_offset_nunits
11221 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11222 .to_constant ();
11223 vec<constructor_elt, va_gc> *ctor_elts;
11224 vec_alloc (ctor_elts, const_nunits);
11225 gimple_seq stmts = NULL;
11226 /* We support offset vectors with more elements
11227 than the data vector for now. */
11228 unsigned HOST_WIDE_INT factor
11229 = const_offset_nunits / const_nunits;
11230 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11231 unsigned elt_offset
11232 = ((vec_num * j + i) % factor) * const_nunits;
11233 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11234 tree scale = size_int (gs_info.scale);
11235 align = get_object_alignment (DR_REF (first_dr_info->dr));
11236 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11237 for (unsigned k = 0; k < const_nunits; ++k)
11238 {
11239 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11240 bitsize_int (k + elt_offset));
11241 tree idx
11242 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11243 vec_offset, TYPE_SIZE (idx_type), boff);
11244 idx = gimple_convert (&stmts, sizetype, idx);
11245 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11246 scale);
11247 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11248 TREE_TYPE (dataref_ptr),
11249 dataref_ptr, idx);
11250 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11251 tree elt = make_ssa_name (TREE_TYPE (vectype));
11252 tree ref = build2 (MEM_REF, ltype, ptr,
11253 build_int_cst (ref_type, 0));
11254 new_stmt = gimple_build_assign (elt, ref);
11255 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11256 gimple_seq_add_stmt (&stmts, new_stmt);
11257 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11258 }
11259 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11260 new_stmt = gimple_build_assign (
11261 NULL_TREE, build_constructor (vectype, ctor_elts));
11262 data_ref = NULL_TREE;
11263 }
11264
11265 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11266 /* DATA_REF is null if we've already built the statement. */
11267 if (data_ref)
11268 {
11269 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11270 new_stmt = gimple_build_assign (vec_dest, data_ref);
11271 }
11272 new_temp = make_ssa_name (vec_dest, new_stmt);
11273 gimple_set_lhs (new_stmt, new_temp);
11274 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11275
11276 /* Store vector loads in the corresponding SLP_NODE. */
11277 if (slp)
11278 slp_node->push_vec_def (new_stmt);
11279 }
11280
11281 if (!slp && !costing_p)
11282 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11283 }
11284
11285 if (!slp && !costing_p)
11286 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11287
11288 if (costing_p && dump_enabled_p ())
11289 dump_printf_loc (MSG_NOTE, vect_location,
11290 "vect_model_load_cost: inside_cost = %u, "
11291 "prologue_cost = %u .\n",
11292 inside_cost, prologue_cost);
11293 return true;
11294 }
11295
11296 poly_uint64 group_elt = 0;
11297 unsigned int inside_cost = 0, prologue_cost = 0;
11298 /* For costing some adjacent vector loads, we'd like to cost with
11299 the total number of them once instead of cost each one by one. */
11300 unsigned int n_adjacent_loads = 0;
11301 for (j = 0; j < ncopies; j++)
11302 {
11303 /* 1. Create the vector or array pointer update chain. */
11304 if (j == 0 && !costing_p)
11305 {
11306 bool simd_lane_access_p
11307 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11308 if (simd_lane_access_p
11309 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11310 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11311 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11312 && integer_zerop (DR_INIT (first_dr_info->dr))
11313 && alias_sets_conflict_p (get_alias_set (aggr_type),
11314 get_alias_set (TREE_TYPE (ref_type)))
11315 && (alignment_support_scheme == dr_aligned
11316 || alignment_support_scheme == dr_unaligned_supported))
11317 {
11318 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11319 dataref_offset = build_int_cst (ref_type, 0);
11320 }
11321 else if (diff_first_stmt_info)
11322 {
11323 dataref_ptr
11324 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11325 aggr_type, at_loop, offset, &dummy,
11326 gsi, &ptr_incr, simd_lane_access_p,
11327 bump);
11328 /* Adjust the pointer by the difference to first_stmt. */
11329 data_reference_p ptrdr
11330 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11331 tree diff
11332 = fold_convert (sizetype,
11333 size_binop (MINUS_EXPR,
11334 DR_INIT (first_dr_info->dr),
11335 DR_INIT (ptrdr)));
11336 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11337 stmt_info, diff);
11338 if (alignment_support_scheme == dr_explicit_realign)
11339 {
11340 msq = vect_setup_realignment (vinfo,
11341 first_stmt_info_for_drptr, gsi,
11342 &realignment_token,
11343 alignment_support_scheme,
11344 dataref_ptr, &at_loop);
11345 gcc_assert (!compute_in_loop);
11346 }
11347 }
11348 else
11349 dataref_ptr
11350 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11351 at_loop,
11352 offset, &dummy, gsi, &ptr_incr,
11353 simd_lane_access_p, bump);
11354 }
11355 else if (!costing_p)
11356 {
11357 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11358 if (dataref_offset)
11359 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11360 bump);
11361 else
11362 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11363 stmt_info, bump);
11364 }
11365
11366 if (grouped_load || slp_perm)
11367 dr_chain.create (vec_num);
11368
11369 gimple *new_stmt = NULL;
11370 for (i = 0; i < vec_num; i++)
11371 {
11372 tree final_mask = NULL_TREE;
11373 tree final_len = NULL_TREE;
11374 tree bias = NULL_TREE;
11375 if (!costing_p)
11376 {
11377 if (mask)
11378 vec_mask = vec_masks[vec_num * j + i];
11379 if (loop_masks)
11380 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11381 vec_num * ncopies, vectype,
11382 vec_num * j + i);
11383 if (vec_mask)
11384 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11385 final_mask, vec_mask, gsi);
11386
11387 if (i > 0)
11388 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11389 gsi, stmt_info, bump);
11390 }
11391
11392 /* 2. Create the vector-load in the loop. */
11393 switch (alignment_support_scheme)
11394 {
11395 case dr_aligned:
11396 case dr_unaligned_supported:
11397 {
11398 if (costing_p)
11399 break;
11400
11401 unsigned int misalign;
11402 unsigned HOST_WIDE_INT align;
11403 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11404 if (alignment_support_scheme == dr_aligned)
11405 misalign = 0;
11406 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11407 {
11408 align
11409 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11410 misalign = 0;
11411 }
11412 else
11413 misalign = misalignment;
11414 if (dataref_offset == NULL_TREE
11415 && TREE_CODE (dataref_ptr) == SSA_NAME)
11416 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11417 misalign);
11418 align = least_bit_hwi (misalign | align);
11419
11420 /* Compute IFN when LOOP_LENS or final_mask valid. */
11421 machine_mode vmode = TYPE_MODE (vectype);
11422 machine_mode new_vmode = vmode;
11423 internal_fn partial_ifn = IFN_LAST;
11424 if (loop_lens)
11425 {
11426 opt_machine_mode new_ovmode
11427 = get_len_load_store_mode (vmode, true, &partial_ifn);
11428 new_vmode = new_ovmode.require ();
11429 unsigned factor
11430 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11431 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11432 vec_num * ncopies, vectype,
11433 vec_num * j + i, factor);
11434 }
11435 else if (final_mask)
11436 {
11437 if (!can_vec_mask_load_store_p (
11438 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11439 &partial_ifn))
11440 gcc_unreachable ();
11441 }
11442
11443 if (partial_ifn == IFN_MASK_LEN_LOAD)
11444 {
11445 if (!final_len)
11446 {
11447 /* Pass VF value to 'len' argument of
11448 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11449 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11450 }
11451 if (!final_mask)
11452 {
11453 /* Pass all ones value to 'mask' argument of
11454 MASK_LEN_LOAD if final_mask is invalid. */
11455 mask_vectype = truth_type_for (vectype);
11456 final_mask = build_minus_one_cst (mask_vectype);
11457 }
11458 }
11459 if (final_len)
11460 {
11461 signed char biasval
11462 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11463
11464 bias = build_int_cst (intQI_type_node, biasval);
11465 }
11466
11467 if (final_len)
11468 {
11469 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11470 gcall *call;
11471 if (partial_ifn == IFN_MASK_LEN_LOAD)
11472 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11473 dataref_ptr, ptr,
11474 final_mask, final_len,
11475 bias);
11476 else
11477 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11478 dataref_ptr, ptr,
11479 final_len, bias);
11480 gimple_call_set_nothrow (call, true);
11481 new_stmt = call;
11482 data_ref = NULL_TREE;
11483
11484 /* Need conversion if it's wrapped with VnQI. */
11485 if (vmode != new_vmode)
11486 {
11487 tree new_vtype = build_vector_type_for_mode (
11488 unsigned_intQI_type_node, new_vmode);
11489 tree var
11490 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11491 gimple_set_lhs (call, var);
11492 vect_finish_stmt_generation (vinfo, stmt_info, call,
11493 gsi);
11494 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11495 new_stmt = gimple_build_assign (vec_dest,
11496 VIEW_CONVERT_EXPR, op);
11497 }
11498 }
11499 else if (final_mask)
11500 {
11501 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11502 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11503 dataref_ptr, ptr,
11504 final_mask);
11505 gimple_call_set_nothrow (call, true);
11506 new_stmt = call;
11507 data_ref = NULL_TREE;
11508 }
11509 else
11510 {
11511 tree ltype = vectype;
11512 tree new_vtype = NULL_TREE;
11513 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11514 unsigned int vect_align
11515 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11516 unsigned int scalar_dr_size
11517 = vect_get_scalar_dr_size (first_dr_info);
11518 /* If there's no peeling for gaps but we have a gap
11519 with slp loads then load the lower half of the
11520 vector only. See get_group_load_store_type for
11521 when we apply this optimization. */
11522 if (slp
11523 && loop_vinfo
11524 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11525 && known_eq (nunits, (group_size - gap) * 2)
11526 && known_eq (nunits, group_size)
11527 && gap >= (vect_align / scalar_dr_size))
11528 {
11529 tree half_vtype;
11530 new_vtype
11531 = vector_vector_composition_type (vectype, 2,
11532 &half_vtype);
11533 if (new_vtype != NULL_TREE)
11534 ltype = half_vtype;
11535 }
11536 /* Try to use a single smaller load when we are about
11537 to load excess elements compared to the unrolled
11538 scalar loop.
11539 ??? This should cover the above case as well. */
11540 else if (known_gt ((vec_num * j + i + 1) * nunits,
11541 (group_size * vf - gap)))
11542 {
11543 if (known_ge ((vec_num * j + i + 1) * nunits
11544 - (group_size * vf - gap), nunits))
11545 /* DR will be unused. */
11546 ltype = NULL_TREE;
11547 else if (known_ge (vect_align,
11548 tree_to_poly_uint64
11549 (TYPE_SIZE_UNIT (vectype))))
11550 /* Aligned access to excess elements is OK if
11551 at least one element is accessed in the
11552 scalar loop. */
11553 ;
11554 else
11555 {
11556 auto remain
11557 = ((group_size * vf - gap)
11558 - (vec_num * j + i) * nunits);
11559 /* remain should now be > 0 and < nunits. */
11560 unsigned num;
11561 if (constant_multiple_p (nunits, remain, &num))
11562 {
11563 tree ptype;
11564 new_vtype
11565 = vector_vector_composition_type (vectype,
11566 num,
11567 &ptype);
11568 if (new_vtype)
11569 ltype = ptype;
11570 }
11571 /* Else use multiple loads or a masked load? */
11572 }
11573 }
11574 tree offset
11575 = (dataref_offset ? dataref_offset
11576 : build_int_cst (ref_type, 0));
11577 if (!ltype)
11578 ;
11579 else if (ltype != vectype
11580 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11581 {
11582 poly_uint64 gap_offset
11583 = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
11584 - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
11585 tree gapcst = build_int_cstu (ref_type, gap_offset);
11586 offset = size_binop (PLUS_EXPR, offset, gapcst);
11587 }
11588 if (ltype)
11589 {
11590 data_ref
11591 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11592 if (alignment_support_scheme == dr_aligned)
11593 ;
11594 else
11595 TREE_TYPE (data_ref)
11596 = build_aligned_type (TREE_TYPE (data_ref),
11597 align * BITS_PER_UNIT);
11598 }
11599 if (!ltype)
11600 data_ref = build_constructor (vectype, NULL);
11601 else if (ltype != vectype)
11602 {
11603 vect_copy_ref_info (data_ref,
11604 DR_REF (first_dr_info->dr));
11605 tree tem = make_ssa_name (ltype);
11606 new_stmt = gimple_build_assign (tem, data_ref);
11607 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11608 gsi);
11609 data_ref = NULL;
11610 vec<constructor_elt, va_gc> *v;
11611 /* We've computed 'num' above to statically two
11612 or via constant_multiple_p. */
11613 unsigned num
11614 = (exact_div (tree_to_poly_uint64
11615 (TYPE_SIZE_UNIT (vectype)),
11616 tree_to_poly_uint64
11617 (TYPE_SIZE_UNIT (ltype)))
11618 .to_constant ());
11619 vec_alloc (v, num);
11620 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11621 {
11622 while (--num)
11623 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11624 build_zero_cst (ltype));
11625 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11626 }
11627 else
11628 {
11629 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11630 while (--num)
11631 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11632 build_zero_cst (ltype));
11633 }
11634 gcc_assert (new_vtype != NULL_TREE);
11635 if (new_vtype == vectype)
11636 new_stmt = gimple_build_assign (
11637 vec_dest, build_constructor (vectype, v));
11638 else
11639 {
11640 tree new_vname = make_ssa_name (new_vtype);
11641 new_stmt = gimple_build_assign (
11642 new_vname, build_constructor (new_vtype, v));
11643 vect_finish_stmt_generation (vinfo, stmt_info,
11644 new_stmt, gsi);
11645 new_stmt = gimple_build_assign (
11646 vec_dest,
11647 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11648 }
11649 }
11650 }
11651 break;
11652 }
11653 case dr_explicit_realign:
11654 {
11655 if (costing_p)
11656 break;
11657 tree ptr, bump;
11658
11659 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11660
11661 if (compute_in_loop)
11662 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11663 &realignment_token,
11664 dr_explicit_realign,
11665 dataref_ptr, NULL);
11666
11667 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11668 ptr = copy_ssa_name (dataref_ptr);
11669 else
11670 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11671 // For explicit realign the target alignment should be
11672 // known at compile time.
11673 unsigned HOST_WIDE_INT align
11674 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11675 new_stmt = gimple_build_assign (
11676 ptr, BIT_AND_EXPR, dataref_ptr,
11677 build_int_cst (TREE_TYPE (dataref_ptr),
11678 -(HOST_WIDE_INT) align));
11679 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11680 data_ref
11681 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11682 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11683 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11684 new_stmt = gimple_build_assign (vec_dest, data_ref);
11685 new_temp = make_ssa_name (vec_dest, new_stmt);
11686 gimple_assign_set_lhs (new_stmt, new_temp);
11687 gimple_move_vops (new_stmt, stmt_info->stmt);
11688 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11689 msq = new_temp;
11690
11691 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11692 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11693 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11694 bump);
11695 new_stmt = gimple_build_assign (
11696 NULL_TREE, BIT_AND_EXPR, ptr,
11697 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11698 if (TREE_CODE (ptr) == SSA_NAME)
11699 ptr = copy_ssa_name (ptr, new_stmt);
11700 else
11701 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11702 gimple_assign_set_lhs (new_stmt, ptr);
11703 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11704 data_ref
11705 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11706 break;
11707 }
11708 case dr_explicit_realign_optimized:
11709 {
11710 if (costing_p)
11711 break;
11712 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11713 new_temp = copy_ssa_name (dataref_ptr);
11714 else
11715 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11716 // We should only be doing this if we know the target
11717 // alignment at compile time.
11718 unsigned HOST_WIDE_INT align
11719 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11720 new_stmt = gimple_build_assign (
11721 new_temp, BIT_AND_EXPR, dataref_ptr,
11722 build_int_cst (TREE_TYPE (dataref_ptr),
11723 -(HOST_WIDE_INT) align));
11724 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11725 data_ref = build2 (MEM_REF, vectype, new_temp,
11726 build_int_cst (ref_type, 0));
11727 break;
11728 }
11729 default:
11730 gcc_unreachable ();
11731 }
11732
11733 /* One common place to cost the above vect load for different
11734 alignment support schemes. */
11735 if (costing_p)
11736 {
11737 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11738 only need to take care of the first stmt, whose
11739 stmt_info is first_stmt_info, vec_num iterating on it
11740 will cover the cost for the remaining, it's consistent
11741 with transforming. For the prologue cost for realign,
11742 we only need to count it once for the whole group. */
11743 bool first_stmt_info_p = first_stmt_info == stmt_info;
11744 bool add_realign_cost = first_stmt_info_p && i == 0;
11745 if (memory_access_type == VMAT_CONTIGUOUS
11746 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11747 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11748 && (!grouped_load || first_stmt_info_p)))
11749 {
11750 /* Leave realign cases alone to keep them simple. */
11751 if (alignment_support_scheme == dr_explicit_realign_optimized
11752 || alignment_support_scheme == dr_explicit_realign)
11753 vect_get_load_cost (vinfo, stmt_info, 1,
11754 alignment_support_scheme, misalignment,
11755 add_realign_cost, &inside_cost,
11756 &prologue_cost, cost_vec, cost_vec,
11757 true);
11758 else
11759 n_adjacent_loads++;
11760 }
11761 }
11762 else
11763 {
11764 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11765 /* DATA_REF is null if we've already built the statement. */
11766 if (data_ref)
11767 {
11768 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11769 new_stmt = gimple_build_assign (vec_dest, data_ref);
11770 }
11771 new_temp = make_ssa_name (vec_dest, new_stmt);
11772 gimple_set_lhs (new_stmt, new_temp);
11773 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11774 }
11775
11776 /* 3. Handle explicit realignment if necessary/supported.
11777 Create in loop:
11778 vec_dest = realign_load (msq, lsq, realignment_token) */
11779 if (!costing_p
11780 && (alignment_support_scheme == dr_explicit_realign_optimized
11781 || alignment_support_scheme == dr_explicit_realign))
11782 {
11783 lsq = gimple_assign_lhs (new_stmt);
11784 if (!realignment_token)
11785 realignment_token = dataref_ptr;
11786 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11787 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11788 lsq, realignment_token);
11789 new_temp = make_ssa_name (vec_dest, new_stmt);
11790 gimple_assign_set_lhs (new_stmt, new_temp);
11791 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11792
11793 if (alignment_support_scheme == dr_explicit_realign_optimized)
11794 {
11795 gcc_assert (phi);
11796 if (i == vec_num - 1 && j == ncopies - 1)
11797 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11798 UNKNOWN_LOCATION);
11799 msq = lsq;
11800 }
11801 }
11802
11803 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11804 {
11805 if (costing_p)
11806 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11807 stmt_info, 0, vect_body);
11808 else
11809 {
11810 tree perm_mask = perm_mask_for_reverse (vectype);
11811 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11812 perm_mask, stmt_info, gsi);
11813 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11814 }
11815 }
11816
11817 /* Collect vector loads and later create their permutation in
11818 vect_transform_grouped_load (). */
11819 if (!costing_p && (grouped_load || slp_perm))
11820 dr_chain.quick_push (new_temp);
11821
11822 /* Store vector loads in the corresponding SLP_NODE. */
11823 if (!costing_p && slp && !slp_perm)
11824 slp_node->push_vec_def (new_stmt);
11825
11826 /* With SLP permutation we load the gaps as well, without
11827 we need to skip the gaps after we manage to fully load
11828 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11829 group_elt += nunits;
11830 if (!costing_p
11831 && maybe_ne (group_gap_adj, 0U)
11832 && !slp_perm
11833 && known_eq (group_elt, group_size - group_gap_adj))
11834 {
11835 poly_wide_int bump_val
11836 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11837 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11838 == -1)
11839 bump_val = -bump_val;
11840 tree bump = wide_int_to_tree (sizetype, bump_val);
11841 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11842 stmt_info, bump);
11843 group_elt = 0;
11844 }
11845 }
11846 /* Bump the vector pointer to account for a gap or for excess
11847 elements loaded for a permuted SLP load. */
11848 if (!costing_p
11849 && maybe_ne (group_gap_adj, 0U)
11850 && slp_perm)
11851 {
11852 poly_wide_int bump_val
11853 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11854 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11855 bump_val = -bump_val;
11856 tree bump = wide_int_to_tree (sizetype, bump_val);
11857 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11858 stmt_info, bump);
11859 }
11860
11861 if (slp && !slp_perm)
11862 continue;
11863
11864 if (slp_perm)
11865 {
11866 unsigned n_perms;
11867 /* For SLP we know we've seen all possible uses of dr_chain so
11868 direct vect_transform_slp_perm_load to DCE the unused parts.
11869 ??? This is a hack to prevent compile-time issues as seen
11870 in PR101120 and friends. */
11871 if (costing_p)
11872 {
11873 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11874 true, &n_perms, nullptr);
11875 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11876 stmt_info, 0, vect_body);
11877 }
11878 else
11879 {
11880 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11881 gsi, vf, false, &n_perms,
11882 nullptr, true);
11883 gcc_assert (ok);
11884 }
11885 }
11886 else
11887 {
11888 if (grouped_load)
11889 {
11890 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11891 /* We assume that the cost of a single load-lanes instruction
11892 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11893 If a grouped access is instead being provided by a
11894 load-and-permute operation, include the cost of the
11895 permutes. */
11896 if (costing_p && first_stmt_info == stmt_info)
11897 {
11898 /* Uses an even and odd extract operations or shuffle
11899 operations for each needed permute. */
11900 int group_size = DR_GROUP_SIZE (first_stmt_info);
11901 int nstmts = ceil_log2 (group_size) * group_size;
11902 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11903 stmt_info, 0, vect_body);
11904
11905 if (dump_enabled_p ())
11906 dump_printf_loc (MSG_NOTE, vect_location,
11907 "vect_model_load_cost:"
11908 "strided group_size = %d .\n",
11909 group_size);
11910 }
11911 else if (!costing_p)
11912 {
11913 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11914 group_size, gsi);
11915 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11916 }
11917 }
11918 else if (!costing_p)
11919 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11920 }
11921 dr_chain.release ();
11922 }
11923 if (!slp && !costing_p)
11924 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11925
11926 if (costing_p)
11927 {
11928 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11929 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11930 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11931 if (n_adjacent_loads > 0)
11932 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
11933 alignment_support_scheme, misalignment, false,
11934 &inside_cost, &prologue_cost, cost_vec, cost_vec,
11935 true);
11936 if (dump_enabled_p ())
11937 dump_printf_loc (MSG_NOTE, vect_location,
11938 "vect_model_load_cost: inside_cost = %u, "
11939 "prologue_cost = %u .\n",
11940 inside_cost, prologue_cost);
11941 }
11942
11943 return true;
11944 }
11945
11946 /* Function vect_is_simple_cond.
11947
11948 Input:
11949 LOOP - the loop that is being vectorized.
11950 COND - Condition that is checked for simple use.
11951
11952 Output:
11953 *COMP_VECTYPE - the vector type for the comparison.
11954 *DTS - The def types for the arguments of the comparison
11955
11956 Returns whether a COND can be vectorized. Checks whether
11957 condition operands are supportable using vec_is_simple_use. */
11958
11959 static bool
11960 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11961 slp_tree slp_node, tree *comp_vectype,
11962 enum vect_def_type *dts, tree vectype)
11963 {
11964 tree lhs, rhs;
11965 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11966 slp_tree slp_op;
11967
11968 /* Mask case. */
11969 if (TREE_CODE (cond) == SSA_NAME
11970 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11971 {
11972 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11973 &slp_op, &dts[0], comp_vectype)
11974 || !*comp_vectype
11975 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11976 return false;
11977 return true;
11978 }
11979
11980 if (!COMPARISON_CLASS_P (cond))
11981 return false;
11982
11983 lhs = TREE_OPERAND (cond, 0);
11984 rhs = TREE_OPERAND (cond, 1);
11985
11986 if (TREE_CODE (lhs) == SSA_NAME)
11987 {
11988 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11989 &lhs, &slp_op, &dts[0], &vectype1))
11990 return false;
11991 }
11992 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11993 || TREE_CODE (lhs) == FIXED_CST)
11994 dts[0] = vect_constant_def;
11995 else
11996 return false;
11997
11998 if (TREE_CODE (rhs) == SSA_NAME)
11999 {
12000 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
12001 &rhs, &slp_op, &dts[1], &vectype2))
12002 return false;
12003 }
12004 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
12005 || TREE_CODE (rhs) == FIXED_CST)
12006 dts[1] = vect_constant_def;
12007 else
12008 return false;
12009
12010 if (vectype1 && vectype2
12011 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12012 TYPE_VECTOR_SUBPARTS (vectype2)))
12013 return false;
12014
12015 *comp_vectype = vectype1 ? vectype1 : vectype2;
12016 /* Invariant comparison. */
12017 if (! *comp_vectype)
12018 {
12019 tree scalar_type = TREE_TYPE (lhs);
12020 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12021 *comp_vectype = truth_type_for (vectype);
12022 else
12023 {
12024 /* If we can widen the comparison to match vectype do so. */
12025 if (INTEGRAL_TYPE_P (scalar_type)
12026 && !slp_node
12027 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
12028 TYPE_SIZE (TREE_TYPE (vectype))))
12029 scalar_type = build_nonstandard_integer_type
12030 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
12031 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12032 slp_node);
12033 }
12034 }
12035
12036 return true;
12037 }
12038
12039 /* vectorizable_condition.
12040
12041 Check if STMT_INFO is conditional modify expression that can be vectorized.
12042 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12043 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
12044 at GSI.
12045
12046 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12047
12048 Return true if STMT_INFO is vectorizable in this way. */
12049
12050 static bool
12051 vectorizable_condition (vec_info *vinfo,
12052 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12053 gimple **vec_stmt,
12054 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12055 {
12056 tree scalar_dest = NULL_TREE;
12057 tree vec_dest = NULL_TREE;
12058 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
12059 tree then_clause, else_clause;
12060 tree comp_vectype = NULL_TREE;
12061 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
12062 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
12063 tree vec_compare;
12064 tree new_temp;
12065 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12066 enum vect_def_type dts[4]
12067 = {vect_unknown_def_type, vect_unknown_def_type,
12068 vect_unknown_def_type, vect_unknown_def_type};
12069 int ndts = 4;
12070 int ncopies;
12071 int vec_num;
12072 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12073 int i;
12074 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12075 vec<tree> vec_oprnds0 = vNULL;
12076 vec<tree> vec_oprnds1 = vNULL;
12077 vec<tree> vec_oprnds2 = vNULL;
12078 vec<tree> vec_oprnds3 = vNULL;
12079 tree vec_cmp_type;
12080 bool masked = false;
12081
12082 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12083 return false;
12084
12085 /* Is vectorizable conditional operation? */
12086 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12087 if (!stmt)
12088 return false;
12089
12090 code = gimple_assign_rhs_code (stmt);
12091 if (code != COND_EXPR)
12092 return false;
12093
12094 stmt_vec_info reduc_info = NULL;
12095 int reduc_index = -1;
12096 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
12097 bool for_reduction
12098 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
12099 if (for_reduction)
12100 {
12101 if (slp_node)
12102 return false;
12103 reduc_info = info_for_reduction (vinfo, stmt_info);
12104 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
12105 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
12106 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
12107 || reduc_index != -1);
12108 }
12109 else
12110 {
12111 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12112 return false;
12113 }
12114
12115 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12116 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12117
12118 if (slp_node)
12119 {
12120 ncopies = 1;
12121 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
12122 }
12123 else
12124 {
12125 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12126 vec_num = 1;
12127 }
12128
12129 gcc_assert (ncopies >= 1);
12130 if (for_reduction && ncopies > 1)
12131 return false; /* FORNOW */
12132
12133 cond_expr = gimple_assign_rhs1 (stmt);
12134
12135 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
12136 &comp_vectype, &dts[0], vectype)
12137 || !comp_vectype)
12138 return false;
12139
12140 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12141 slp_tree then_slp_node, else_slp_node;
12142 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
12143 &then_clause, &then_slp_node, &dts[2], &vectype1))
12144 return false;
12145 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
12146 &else_clause, &else_slp_node, &dts[3], &vectype2))
12147 return false;
12148
12149 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12150 return false;
12151
12152 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12153 return false;
12154
12155 masked = !COMPARISON_CLASS_P (cond_expr);
12156 vec_cmp_type = truth_type_for (comp_vectype);
12157
12158 if (vec_cmp_type == NULL_TREE)
12159 return false;
12160
12161 cond_code = TREE_CODE (cond_expr);
12162 if (!masked)
12163 {
12164 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12165 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12166 }
12167
12168 /* For conditional reductions, the "then" value needs to be the candidate
12169 value calculated by this iteration while the "else" value needs to be
12170 the result carried over from previous iterations. If the COND_EXPR
12171 is the other way around, we need to swap it. */
12172 bool must_invert_cmp_result = false;
12173 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12174 {
12175 if (masked)
12176 must_invert_cmp_result = true;
12177 else
12178 {
12179 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12180 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12181 if (new_code == ERROR_MARK)
12182 must_invert_cmp_result = true;
12183 else
12184 {
12185 cond_code = new_code;
12186 /* Make sure we don't accidentally use the old condition. */
12187 cond_expr = NULL_TREE;
12188 }
12189 }
12190 std::swap (then_clause, else_clause);
12191 }
12192
12193 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12194 {
12195 /* Boolean values may have another representation in vectors
12196 and therefore we prefer bit operations over comparison for
12197 them (which also works for scalar masks). We store opcodes
12198 to use in bitop1 and bitop2. Statement is vectorized as
12199 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12200 depending on bitop1 and bitop2 arity. */
12201 switch (cond_code)
12202 {
12203 case GT_EXPR:
12204 bitop1 = BIT_NOT_EXPR;
12205 bitop2 = BIT_AND_EXPR;
12206 break;
12207 case GE_EXPR:
12208 bitop1 = BIT_NOT_EXPR;
12209 bitop2 = BIT_IOR_EXPR;
12210 break;
12211 case LT_EXPR:
12212 bitop1 = BIT_NOT_EXPR;
12213 bitop2 = BIT_AND_EXPR;
12214 std::swap (cond_expr0, cond_expr1);
12215 break;
12216 case LE_EXPR:
12217 bitop1 = BIT_NOT_EXPR;
12218 bitop2 = BIT_IOR_EXPR;
12219 std::swap (cond_expr0, cond_expr1);
12220 break;
12221 case NE_EXPR:
12222 bitop1 = BIT_XOR_EXPR;
12223 break;
12224 case EQ_EXPR:
12225 bitop1 = BIT_XOR_EXPR;
12226 bitop2 = BIT_NOT_EXPR;
12227 break;
12228 default:
12229 return false;
12230 }
12231 cond_code = SSA_NAME;
12232 }
12233
12234 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12235 && reduction_type == EXTRACT_LAST_REDUCTION
12236 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12237 {
12238 if (dump_enabled_p ())
12239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12240 "reduction comparison operation not supported.\n");
12241 return false;
12242 }
12243
12244 if (!vec_stmt)
12245 {
12246 if (bitop1 != NOP_EXPR)
12247 {
12248 machine_mode mode = TYPE_MODE (comp_vectype);
12249 optab optab;
12250
12251 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12252 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12253 return false;
12254
12255 if (bitop2 != NOP_EXPR)
12256 {
12257 optab = optab_for_tree_code (bitop2, comp_vectype,
12258 optab_default);
12259 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12260 return false;
12261 }
12262 }
12263
12264 vect_cost_for_stmt kind = vector_stmt;
12265 if (reduction_type == EXTRACT_LAST_REDUCTION)
12266 /* Count one reduction-like operation per vector. */
12267 kind = vec_to_scalar;
12268 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12269 && (masked
12270 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12271 cond_code)
12272 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12273 ERROR_MARK))))
12274 return false;
12275
12276 if (slp_node
12277 && (!vect_maybe_update_slp_op_vectype
12278 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12279 || (op_adjust == 1
12280 && !vect_maybe_update_slp_op_vectype
12281 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12282 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12283 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12284 {
12285 if (dump_enabled_p ())
12286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12287 "incompatible vector types for invariants\n");
12288 return false;
12289 }
12290
12291 if (loop_vinfo && for_reduction
12292 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12293 {
12294 if (reduction_type == EXTRACT_LAST_REDUCTION)
12295 {
12296 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12297 vectype, OPTIMIZE_FOR_SPEED))
12298 vect_record_loop_len (loop_vinfo,
12299 &LOOP_VINFO_LENS (loop_vinfo),
12300 ncopies * vec_num, vectype, 1);
12301 else
12302 vect_record_loop_mask (loop_vinfo,
12303 &LOOP_VINFO_MASKS (loop_vinfo),
12304 ncopies * vec_num, vectype, NULL);
12305 }
12306 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12307 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12308 {
12309 if (dump_enabled_p ())
12310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12311 "conditional reduction prevents the use"
12312 " of partial vectors.\n");
12313 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12314 }
12315 }
12316
12317 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12318 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12319 cost_vec, kind);
12320 return true;
12321 }
12322
12323 /* Transform. */
12324
12325 /* Handle def. */
12326 scalar_dest = gimple_assign_lhs (stmt);
12327 if (reduction_type != EXTRACT_LAST_REDUCTION)
12328 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12329
12330 bool swap_cond_operands = false;
12331
12332 /* See whether another part of the vectorized code applies a loop
12333 mask to the condition, or to its inverse. */
12334
12335 vec_loop_masks *masks = NULL;
12336 vec_loop_lens *lens = NULL;
12337 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12338 {
12339 if (reduction_type == EXTRACT_LAST_REDUCTION)
12340 lens = &LOOP_VINFO_LENS (loop_vinfo);
12341 }
12342 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12343 {
12344 if (reduction_type == EXTRACT_LAST_REDUCTION)
12345 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12346 else
12347 {
12348 scalar_cond_masked_key cond (cond_expr, ncopies);
12349 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12350 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12351 else
12352 {
12353 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12354 tree_code orig_code = cond.code;
12355 cond.code = invert_tree_comparison (cond.code, honor_nans);
12356 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12357 {
12358 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12359 cond_code = cond.code;
12360 swap_cond_operands = true;
12361 }
12362 else
12363 {
12364 /* Try the inverse of the current mask. We check if the
12365 inverse mask is live and if so we generate a negate of
12366 the current mask such that we still honor NaNs. */
12367 cond.inverted_p = true;
12368 cond.code = orig_code;
12369 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12370 {
12371 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12372 cond_code = cond.code;
12373 swap_cond_operands = true;
12374 must_invert_cmp_result = true;
12375 }
12376 }
12377 }
12378 }
12379 }
12380
12381 /* Handle cond expr. */
12382 if (masked)
12383 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12384 cond_expr, comp_vectype, &vec_oprnds0,
12385 then_clause, vectype, &vec_oprnds2,
12386 reduction_type != EXTRACT_LAST_REDUCTION
12387 ? else_clause : NULL, vectype, &vec_oprnds3);
12388 else
12389 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12390 cond_expr0, comp_vectype, &vec_oprnds0,
12391 cond_expr1, comp_vectype, &vec_oprnds1,
12392 then_clause, vectype, &vec_oprnds2,
12393 reduction_type != EXTRACT_LAST_REDUCTION
12394 ? else_clause : NULL, vectype, &vec_oprnds3);
12395
12396 /* Arguments are ready. Create the new vector stmt. */
12397 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12398 {
12399 vec_then_clause = vec_oprnds2[i];
12400 if (reduction_type != EXTRACT_LAST_REDUCTION)
12401 vec_else_clause = vec_oprnds3[i];
12402
12403 if (swap_cond_operands)
12404 std::swap (vec_then_clause, vec_else_clause);
12405
12406 if (masked)
12407 vec_compare = vec_cond_lhs;
12408 else
12409 {
12410 vec_cond_rhs = vec_oprnds1[i];
12411 if (bitop1 == NOP_EXPR)
12412 {
12413 gimple_seq stmts = NULL;
12414 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12415 vec_cond_lhs, vec_cond_rhs);
12416 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12417 }
12418 else
12419 {
12420 new_temp = make_ssa_name (vec_cmp_type);
12421 gassign *new_stmt;
12422 if (bitop1 == BIT_NOT_EXPR)
12423 new_stmt = gimple_build_assign (new_temp, bitop1,
12424 vec_cond_rhs);
12425 else
12426 new_stmt
12427 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12428 vec_cond_rhs);
12429 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12430 if (bitop2 == NOP_EXPR)
12431 vec_compare = new_temp;
12432 else if (bitop2 == BIT_NOT_EXPR
12433 && reduction_type != EXTRACT_LAST_REDUCTION)
12434 {
12435 /* Instead of doing ~x ? y : z do x ? z : y. */
12436 vec_compare = new_temp;
12437 std::swap (vec_then_clause, vec_else_clause);
12438 }
12439 else
12440 {
12441 vec_compare = make_ssa_name (vec_cmp_type);
12442 if (bitop2 == BIT_NOT_EXPR)
12443 new_stmt
12444 = gimple_build_assign (vec_compare, bitop2, new_temp);
12445 else
12446 new_stmt
12447 = gimple_build_assign (vec_compare, bitop2,
12448 vec_cond_lhs, new_temp);
12449 vect_finish_stmt_generation (vinfo, stmt_info,
12450 new_stmt, gsi);
12451 }
12452 }
12453 }
12454
12455 /* If we decided to apply a loop mask to the result of the vector
12456 comparison, AND the comparison with the mask now. Later passes
12457 should then be able to reuse the AND results between mulitple
12458 vector statements.
12459
12460 For example:
12461 for (int i = 0; i < 100; ++i)
12462 x[i] = y[i] ? z[i] : 10;
12463
12464 results in following optimized GIMPLE:
12465
12466 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12467 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12468 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12469 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12470 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12471 vect_iftmp.11_47, { 10, ... }>;
12472
12473 instead of using a masked and unmasked forms of
12474 vec != { 0, ... } (masked in the MASK_LOAD,
12475 unmasked in the VEC_COND_EXPR). */
12476
12477 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12478 in cases where that's necessary. */
12479
12480 tree len = NULL_TREE, bias = NULL_TREE;
12481 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12482 {
12483 if (!is_gimple_val (vec_compare))
12484 {
12485 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12486 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12487 vec_compare);
12488 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12489 vec_compare = vec_compare_name;
12490 }
12491
12492 if (must_invert_cmp_result)
12493 {
12494 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12495 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12496 BIT_NOT_EXPR,
12497 vec_compare);
12498 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12499 vec_compare = vec_compare_name;
12500 }
12501
12502 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12503 vectype, OPTIMIZE_FOR_SPEED))
12504 {
12505 if (lens)
12506 {
12507 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12508 vec_num * ncopies, vectype, i, 1);
12509 signed char biasval
12510 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12511 bias = build_int_cst (intQI_type_node, biasval);
12512 }
12513 else
12514 {
12515 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12516 bias = build_int_cst (intQI_type_node, 0);
12517 }
12518 }
12519 if (masks)
12520 {
12521 tree loop_mask
12522 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12523 vectype, i);
12524 tree tmp2 = make_ssa_name (vec_cmp_type);
12525 gassign *g
12526 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12527 loop_mask);
12528 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12529 vec_compare = tmp2;
12530 }
12531 }
12532
12533 gimple *new_stmt;
12534 if (reduction_type == EXTRACT_LAST_REDUCTION)
12535 {
12536 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12537 tree lhs = gimple_get_lhs (old_stmt);
12538 if (len)
12539 new_stmt = gimple_build_call_internal
12540 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12541 vec_then_clause, len, bias);
12542 else
12543 new_stmt = gimple_build_call_internal
12544 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12545 vec_then_clause);
12546 gimple_call_set_lhs (new_stmt, lhs);
12547 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12548 if (old_stmt == gsi_stmt (*gsi))
12549 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12550 else
12551 {
12552 /* In this case we're moving the definition to later in the
12553 block. That doesn't matter because the only uses of the
12554 lhs are in phi statements. */
12555 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12556 gsi_remove (&old_gsi, true);
12557 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12558 }
12559 }
12560 else
12561 {
12562 new_temp = make_ssa_name (vec_dest);
12563 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12564 vec_then_clause, vec_else_clause);
12565 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12566 }
12567 if (slp_node)
12568 slp_node->push_vec_def (new_stmt);
12569 else
12570 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12571 }
12572
12573 if (!slp_node)
12574 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12575
12576 vec_oprnds0.release ();
12577 vec_oprnds1.release ();
12578 vec_oprnds2.release ();
12579 vec_oprnds3.release ();
12580
12581 return true;
12582 }
12583
12584 /* Helper of vectorizable_comparison.
12585
12586 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12587 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12588 comparison, put it in VEC_STMT, and insert it at GSI.
12589
12590 Return true if STMT_INFO is vectorizable in this way. */
12591
12592 static bool
12593 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12594 stmt_vec_info stmt_info, tree_code code,
12595 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12596 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12597 {
12598 tree lhs, rhs1, rhs2;
12599 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12600 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12601 tree new_temp;
12602 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12603 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12604 int ndts = 2;
12605 poly_uint64 nunits;
12606 int ncopies;
12607 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12608 int i;
12609 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12610 vec<tree> vec_oprnds0 = vNULL;
12611 vec<tree> vec_oprnds1 = vNULL;
12612 tree mask_type;
12613 tree mask = NULL_TREE;
12614
12615 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12616 return false;
12617
12618 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12619 return false;
12620
12621 mask_type = vectype;
12622 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12623
12624 if (slp_node)
12625 ncopies = 1;
12626 else
12627 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12628
12629 gcc_assert (ncopies >= 1);
12630
12631 if (TREE_CODE_CLASS (code) != tcc_comparison)
12632 return false;
12633
12634 slp_tree slp_rhs1, slp_rhs2;
12635 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12636 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12637 return false;
12638
12639 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12640 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12641 return false;
12642
12643 if (vectype1 && vectype2
12644 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12645 TYPE_VECTOR_SUBPARTS (vectype2)))
12646 return false;
12647
12648 vectype = vectype1 ? vectype1 : vectype2;
12649
12650 /* Invariant comparison. */
12651 if (!vectype)
12652 {
12653 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12654 vectype = mask_type;
12655 else
12656 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12657 slp_node);
12658 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12659 return false;
12660 }
12661 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12662 return false;
12663
12664 /* Can't compare mask and non-mask types. */
12665 if (vectype1 && vectype2
12666 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12667 return false;
12668
12669 /* Boolean values may have another representation in vectors
12670 and therefore we prefer bit operations over comparison for
12671 them (which also works for scalar masks). We store opcodes
12672 to use in bitop1 and bitop2. Statement is vectorized as
12673 BITOP2 (rhs1 BITOP1 rhs2) or
12674 rhs1 BITOP2 (BITOP1 rhs2)
12675 depending on bitop1 and bitop2 arity. */
12676 bool swap_p = false;
12677 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12678 {
12679 if (code == GT_EXPR)
12680 {
12681 bitop1 = BIT_NOT_EXPR;
12682 bitop2 = BIT_AND_EXPR;
12683 }
12684 else if (code == GE_EXPR)
12685 {
12686 bitop1 = BIT_NOT_EXPR;
12687 bitop2 = BIT_IOR_EXPR;
12688 }
12689 else if (code == LT_EXPR)
12690 {
12691 bitop1 = BIT_NOT_EXPR;
12692 bitop2 = BIT_AND_EXPR;
12693 swap_p = true;
12694 }
12695 else if (code == LE_EXPR)
12696 {
12697 bitop1 = BIT_NOT_EXPR;
12698 bitop2 = BIT_IOR_EXPR;
12699 swap_p = true;
12700 }
12701 else
12702 {
12703 bitop1 = BIT_XOR_EXPR;
12704 if (code == EQ_EXPR)
12705 bitop2 = BIT_NOT_EXPR;
12706 }
12707 }
12708
12709 if (!vec_stmt)
12710 {
12711 if (bitop1 == NOP_EXPR)
12712 {
12713 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12714 return false;
12715 }
12716 else
12717 {
12718 machine_mode mode = TYPE_MODE (vectype);
12719 optab optab;
12720
12721 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12722 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12723 return false;
12724
12725 if (bitop2 != NOP_EXPR)
12726 {
12727 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12728 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12729 return false;
12730 }
12731 }
12732
12733 /* Put types on constant and invariant SLP children. */
12734 if (slp_node
12735 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12736 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12737 {
12738 if (dump_enabled_p ())
12739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12740 "incompatible vector types for invariants\n");
12741 return false;
12742 }
12743
12744 vect_model_simple_cost (vinfo, stmt_info,
12745 ncopies * (1 + (bitop2 != NOP_EXPR)),
12746 dts, ndts, slp_node, cost_vec);
12747 return true;
12748 }
12749
12750 /* Transform. */
12751
12752 /* Handle def. */
12753 lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
12754 if (lhs)
12755 mask = vect_create_destination_var (lhs, mask_type);
12756
12757 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12758 rhs1, vectype, &vec_oprnds0,
12759 rhs2, vectype, &vec_oprnds1);
12760 if (swap_p)
12761 std::swap (vec_oprnds0, vec_oprnds1);
12762
12763 /* Arguments are ready. Create the new vector stmt. */
12764 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12765 {
12766 gimple *new_stmt;
12767 vec_rhs2 = vec_oprnds1[i];
12768
12769 if (lhs)
12770 new_temp = make_ssa_name (mask);
12771 else
12772 new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
12773 if (bitop1 == NOP_EXPR)
12774 {
12775 new_stmt = gimple_build_assign (new_temp, code,
12776 vec_rhs1, vec_rhs2);
12777 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12778 }
12779 else
12780 {
12781 if (bitop1 == BIT_NOT_EXPR)
12782 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12783 else
12784 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12785 vec_rhs2);
12786 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12787 if (bitop2 != NOP_EXPR)
12788 {
12789 tree res = make_ssa_name (mask);
12790 if (bitop2 == BIT_NOT_EXPR)
12791 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12792 else
12793 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12794 new_temp);
12795 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12796 }
12797 }
12798 if (slp_node)
12799 slp_node->push_vec_def (new_stmt);
12800 else
12801 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12802 }
12803
12804 if (!slp_node)
12805 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12806
12807 vec_oprnds0.release ();
12808 vec_oprnds1.release ();
12809
12810 return true;
12811 }
12812
12813 /* vectorizable_comparison.
12814
12815 Check if STMT_INFO is comparison expression that can be vectorized.
12816 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12817 comparison, put it in VEC_STMT, and insert it at GSI.
12818
12819 Return true if STMT_INFO is vectorizable in this way. */
12820
12821 static bool
12822 vectorizable_comparison (vec_info *vinfo,
12823 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12824 gimple **vec_stmt,
12825 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12826 {
12827 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12828
12829 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12830 return false;
12831
12832 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12833 return false;
12834
12835 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12836 if (!stmt)
12837 return false;
12838
12839 enum tree_code code = gimple_assign_rhs_code (stmt);
12840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12841 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12842 vec_stmt, slp_node, cost_vec))
12843 return false;
12844
12845 if (!vec_stmt)
12846 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12847
12848 return true;
12849 }
12850
12851 /* Check to see if the current early break given in STMT_INFO is valid for
12852 vectorization. */
12853
12854 static bool
12855 vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
12856 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12857 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12858 {
12859 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12860 if (!loop_vinfo
12861 || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
12862 return false;
12863
12864 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
12865 return false;
12866
12867 if (!STMT_VINFO_RELEVANT_P (stmt_info))
12868 return false;
12869
12870 DUMP_VECT_SCOPE ("vectorizable_early_exit");
12871
12872 auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
12873
12874 tree vectype = NULL_TREE;
12875 slp_tree slp_op0;
12876 tree op0;
12877 enum vect_def_type dt0;
12878 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0,
12879 &vectype))
12880 {
12881 if (dump_enabled_p ())
12882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12883 "use not simple.\n");
12884 return false;
12885 }
12886
12887 if (!vectype)
12888 return false;
12889
12890 machine_mode mode = TYPE_MODE (vectype);
12891 int ncopies;
12892
12893 if (slp_node)
12894 ncopies = 1;
12895 else
12896 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12897
12898 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
12899 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
12900 bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
12901 bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
12902
12903 /* Now build the new conditional. Pattern gimple_conds get dropped during
12904 codegen so we must replace the original insn. */
12905 gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
12906 gcond *cond_stmt = as_a <gcond *>(orig_stmt);
12907 /* When vectorizing we assume that if the branch edge is taken that we're
12908 exiting the loop. This is not however always the case as the compiler will
12909 rewrite conditions to always be a comparison against 0. To do this it
12910 sometimes flips the edges. This is fine for scalar, but for vector we
12911 then have to flip the test, as we're still assuming that if you take the
12912 branch edge that we found the exit condition. i.e. we need to know whether
12913 we are generating a `forall` or an `exist` condition. */
12914 auto new_code = NE_EXPR;
12915 auto reduc_optab = ior_optab;
12916 auto reduc_op = BIT_IOR_EXPR;
12917 tree cst = build_zero_cst (vectype);
12918 edge exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 0);
12919 if (exit_true_edge->flags & EDGE_FALSE_VALUE)
12920 exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 1);
12921 gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
12922 if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
12923 exit_true_edge->dest))
12924 {
12925 new_code = EQ_EXPR;
12926 reduc_optab = and_optab;
12927 reduc_op = BIT_AND_EXPR;
12928 cst = build_minus_one_cst (vectype);
12929 }
12930
12931 /* Analyze only. */
12932 if (!vec_stmt)
12933 {
12934 if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
12935 {
12936 if (dump_enabled_p ())
12937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12938 "can't vectorize early exit because the "
12939 "target doesn't support flag setting vector "
12940 "comparisons.\n");
12941 return false;
12942 }
12943
12944 if (ncopies > 1
12945 && direct_optab_handler (reduc_optab, mode) == CODE_FOR_nothing)
12946 {
12947 if (dump_enabled_p ())
12948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12949 "can't vectorize early exit because the "
12950 "target does not support boolean vector %s "
12951 "for type %T.\n",
12952 reduc_optab == ior_optab ? "OR" : "AND",
12953 vectype);
12954 return false;
12955 }
12956
12957 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12958 vec_stmt, slp_node, cost_vec))
12959 return false;
12960
12961 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12962 {
12963 if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
12964 OPTIMIZE_FOR_SPEED))
12965 vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
12966 else
12967 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
12968 }
12969
12970 return true;
12971 }
12972
12973 /* Tranform. */
12974
12975 tree new_temp = NULL_TREE;
12976 gimple *new_stmt = NULL;
12977
12978 if (dump_enabled_p ())
12979 dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
12980
12981 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12982 vec_stmt, slp_node, cost_vec))
12983 gcc_unreachable ();
12984
12985 gimple *stmt = STMT_VINFO_STMT (stmt_info);
12986 basic_block cond_bb = gimple_bb (stmt);
12987 gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
12988
12989 auto_vec<tree> stmts;
12990
12991 if (slp_node)
12992 stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
12993 else
12994 {
12995 auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
12996 stmts.reserve_exact (vec_stmts.length ());
12997 for (auto stmt : vec_stmts)
12998 stmts.quick_push (gimple_assign_lhs (stmt));
12999 }
13000
13001 /* Determine if we need to reduce the final value. */
13002 if (stmts.length () > 1)
13003 {
13004 /* We build the reductions in a way to maintain as much parallelism as
13005 possible. */
13006 auto_vec<tree> workset (stmts.length ());
13007
13008 /* Mask the statements as we queue them up. Normally we loop over
13009 vec_num, but since we inspect the exact results of vectorization
13010 we don't need to and instead can just use the stmts themselves. */
13011 if (masked_loop_p)
13012 for (unsigned i = 0; i < stmts.length (); i++)
13013 {
13014 tree stmt_mask
13015 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype,
13016 i);
13017 stmt_mask
13018 = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
13019 stmts[i], &cond_gsi);
13020 workset.quick_push (stmt_mask);
13021 }
13022 else if (len_loop_p)
13023 for (unsigned i = 0; i < stmts.length (); i++)
13024 {
13025 tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
13026 lens, ncopies, vectype,
13027 stmts[i], i, 1);
13028
13029 workset.quick_push (len_mask);
13030 }
13031 else
13032 workset.splice (stmts);
13033
13034 while (workset.length () > 1)
13035 {
13036 new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc");
13037 tree arg0 = workset.pop ();
13038 tree arg1 = workset.pop ();
13039 new_stmt = gimple_build_assign (new_temp, reduc_op, arg0, arg1);
13040 vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
13041 &cond_gsi);
13042 workset.quick_insert (0, new_temp);
13043 }
13044 }
13045 else
13046 {
13047 new_temp = stmts[0];
13048 if (masked_loop_p)
13049 {
13050 tree mask
13051 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, 0);
13052 new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
13053 new_temp, &cond_gsi);
13054 }
13055 else if (len_loop_p)
13056 new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
13057 ncopies, vectype, new_temp, 0, 1);
13058 }
13059
13060 gcc_assert (new_temp);
13061
13062 gimple_cond_set_condition (cond_stmt, new_code, new_temp, cst);
13063 update_stmt (orig_stmt);
13064
13065 if (slp_node)
13066 SLP_TREE_VEC_DEFS (slp_node).truncate (0);
13067 else
13068 STMT_VINFO_VEC_STMTS (stmt_info).truncate (0);
13069
13070 if (!slp_node)
13071 *vec_stmt = orig_stmt;
13072
13073 return true;
13074 }
13075
13076 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13077 can handle all live statements in the node. Otherwise return true
13078 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13079 VEC_STMT_P is as for vectorizable_live_operation. */
13080
13081 static bool
13082 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
13083 slp_tree slp_node, slp_instance slp_node_instance,
13084 bool vec_stmt_p,
13085 stmt_vector_for_cost *cost_vec)
13086 {
13087 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
13088 if (slp_node)
13089 {
13090 stmt_vec_info slp_stmt_info;
13091 unsigned int i;
13092 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
13093 {
13094 if ((STMT_VINFO_LIVE_P (slp_stmt_info)
13095 || (loop_vinfo
13096 && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13097 && STMT_VINFO_DEF_TYPE (slp_stmt_info)
13098 == vect_induction_def))
13099 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
13100 slp_node_instance, i,
13101 vec_stmt_p, cost_vec))
13102 return false;
13103 }
13104 }
13105 else if ((STMT_VINFO_LIVE_P (stmt_info)
13106 || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13107 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
13108 && !vectorizable_live_operation (vinfo, stmt_info,
13109 slp_node, slp_node_instance, -1,
13110 vec_stmt_p, cost_vec))
13111 return false;
13112
13113 return true;
13114 }
13115
13116 /* Make sure the statement is vectorizable. */
13117
13118 opt_result
13119 vect_analyze_stmt (vec_info *vinfo,
13120 stmt_vec_info stmt_info, bool *need_to_vectorize,
13121 slp_tree node, slp_instance node_instance,
13122 stmt_vector_for_cost *cost_vec)
13123 {
13124 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
13125 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
13126 bool ok;
13127 gimple_seq pattern_def_seq;
13128
13129 if (dump_enabled_p ())
13130 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
13131 stmt_info->stmt);
13132
13133 if (gimple_has_volatile_ops (stmt_info->stmt))
13134 return opt_result::failure_at (stmt_info->stmt,
13135 "not vectorized:"
13136 " stmt has volatile operands: %G\n",
13137 stmt_info->stmt);
13138
13139 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13140 && node == NULL
13141 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
13142 {
13143 gimple_stmt_iterator si;
13144
13145 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
13146 {
13147 stmt_vec_info pattern_def_stmt_info
13148 = vinfo->lookup_stmt (gsi_stmt (si));
13149 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
13150 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
13151 {
13152 /* Analyze def stmt of STMT if it's a pattern stmt. */
13153 if (dump_enabled_p ())
13154 dump_printf_loc (MSG_NOTE, vect_location,
13155 "==> examining pattern def statement: %G",
13156 pattern_def_stmt_info->stmt);
13157
13158 opt_result res
13159 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
13160 need_to_vectorize, node, node_instance,
13161 cost_vec);
13162 if (!res)
13163 return res;
13164 }
13165 }
13166 }
13167
13168 /* Skip stmts that do not need to be vectorized. In loops this is expected
13169 to include:
13170 - the COND_EXPR which is the loop exit condition
13171 - any LABEL_EXPRs in the loop
13172 - computations that are used only for array indexing or loop control.
13173 In basic blocks we only analyze statements that are a part of some SLP
13174 instance, therefore, all the statements are relevant.
13175
13176 Pattern statement needs to be analyzed instead of the original statement
13177 if the original statement is not relevant. Otherwise, we analyze both
13178 statements. In basic blocks we are called from some SLP instance
13179 traversal, don't analyze pattern stmts instead, the pattern stmts
13180 already will be part of SLP instance. */
13181
13182 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
13183 if (!STMT_VINFO_RELEVANT_P (stmt_info)
13184 && !STMT_VINFO_LIVE_P (stmt_info))
13185 {
13186 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13187 && pattern_stmt_info
13188 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13189 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13190 {
13191 /* Analyze PATTERN_STMT instead of the original stmt. */
13192 stmt_info = pattern_stmt_info;
13193 if (dump_enabled_p ())
13194 dump_printf_loc (MSG_NOTE, vect_location,
13195 "==> examining pattern statement: %G",
13196 stmt_info->stmt);
13197 }
13198 else
13199 {
13200 if (dump_enabled_p ())
13201 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
13202
13203 return opt_result::success ();
13204 }
13205 }
13206 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13207 && node == NULL
13208 && pattern_stmt_info
13209 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13210 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13211 {
13212 /* Analyze PATTERN_STMT too. */
13213 if (dump_enabled_p ())
13214 dump_printf_loc (MSG_NOTE, vect_location,
13215 "==> examining pattern statement: %G",
13216 pattern_stmt_info->stmt);
13217
13218 opt_result res
13219 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
13220 node_instance, cost_vec);
13221 if (!res)
13222 return res;
13223 }
13224
13225 switch (STMT_VINFO_DEF_TYPE (stmt_info))
13226 {
13227 case vect_internal_def:
13228 case vect_condition_def:
13229 break;
13230
13231 case vect_reduction_def:
13232 case vect_nested_cycle:
13233 gcc_assert (!bb_vinfo
13234 && (relevance == vect_used_in_outer
13235 || relevance == vect_used_in_outer_by_reduction
13236 || relevance == vect_used_by_reduction
13237 || relevance == vect_unused_in_scope
13238 || relevance == vect_used_only_live));
13239 break;
13240
13241 case vect_induction_def:
13242 case vect_first_order_recurrence:
13243 gcc_assert (!bb_vinfo);
13244 break;
13245
13246 case vect_constant_def:
13247 case vect_external_def:
13248 case vect_unknown_def_type:
13249 default:
13250 gcc_unreachable ();
13251 }
13252
13253 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13254 if (node)
13255 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
13256
13257 if (STMT_VINFO_RELEVANT_P (stmt_info))
13258 {
13259 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
13260 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
13261 || gimple_code (stmt_info->stmt) == GIMPLE_COND
13262 || (call && gimple_call_lhs (call) == NULL_TREE));
13263 *need_to_vectorize = true;
13264 }
13265
13266 if (PURE_SLP_STMT (stmt_info) && !node)
13267 {
13268 if (dump_enabled_p ())
13269 dump_printf_loc (MSG_NOTE, vect_location,
13270 "handled only by SLP analysis\n");
13271 return opt_result::success ();
13272 }
13273
13274 ok = true;
13275 if (!bb_vinfo
13276 && (STMT_VINFO_RELEVANT_P (stmt_info)
13277 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
13278 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13279 -mveclibabi= takes preference over library functions with
13280 the simd attribute. */
13281 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13282 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
13283 cost_vec)
13284 || vectorizable_conversion (vinfo, stmt_info,
13285 NULL, NULL, node, cost_vec)
13286 || vectorizable_operation (vinfo, stmt_info,
13287 NULL, NULL, node, cost_vec)
13288 || vectorizable_assignment (vinfo, stmt_info,
13289 NULL, NULL, node, cost_vec)
13290 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13291 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13292 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13293 node, node_instance, cost_vec)
13294 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
13295 NULL, node, cost_vec)
13296 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13297 || vectorizable_condition (vinfo, stmt_info,
13298 NULL, NULL, node, cost_vec)
13299 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
13300 cost_vec)
13301 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13302 stmt_info, NULL, node)
13303 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13304 stmt_info, NULL, node, cost_vec)
13305 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
13306 cost_vec));
13307 else
13308 {
13309 if (bb_vinfo)
13310 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13311 || vectorizable_simd_clone_call (vinfo, stmt_info,
13312 NULL, NULL, node, cost_vec)
13313 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
13314 cost_vec)
13315 || vectorizable_shift (vinfo, stmt_info,
13316 NULL, NULL, node, cost_vec)
13317 || vectorizable_operation (vinfo, stmt_info,
13318 NULL, NULL, node, cost_vec)
13319 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
13320 cost_vec)
13321 || vectorizable_load (vinfo, stmt_info,
13322 NULL, NULL, node, cost_vec)
13323 || vectorizable_store (vinfo, stmt_info,
13324 NULL, NULL, node, cost_vec)
13325 || vectorizable_condition (vinfo, stmt_info,
13326 NULL, NULL, node, cost_vec)
13327 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
13328 cost_vec)
13329 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
13330 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
13331 cost_vec));
13332
13333 }
13334
13335 if (node)
13336 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13337
13338 if (!ok)
13339 return opt_result::failure_at (stmt_info->stmt,
13340 "not vectorized:"
13341 " relevant stmt not supported: %G",
13342 stmt_info->stmt);
13343
13344 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13345 need extra handling, except for vectorizable reductions. */
13346 if (!bb_vinfo
13347 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
13348 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
13349 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
13350 stmt_info, node, node_instance,
13351 false, cost_vec))
13352 return opt_result::failure_at (stmt_info->stmt,
13353 "not vectorized:"
13354 " live stmt not supported: %G",
13355 stmt_info->stmt);
13356
13357 return opt_result::success ();
13358 }
13359
13360
13361 /* Function vect_transform_stmt.
13362
13363 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13364
13365 bool
13366 vect_transform_stmt (vec_info *vinfo,
13367 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13368 slp_tree slp_node, slp_instance slp_node_instance)
13369 {
13370 bool is_store = false;
13371 gimple *vec_stmt = NULL;
13372 bool done;
13373
13374 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
13375
13376 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13377 if (slp_node)
13378 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13379
13380 switch (STMT_VINFO_TYPE (stmt_info))
13381 {
13382 case type_demotion_vec_info_type:
13383 case type_promotion_vec_info_type:
13384 case type_conversion_vec_info_type:
13385 done = vectorizable_conversion (vinfo, stmt_info,
13386 gsi, &vec_stmt, slp_node, NULL);
13387 gcc_assert (done);
13388 break;
13389
13390 case induc_vec_info_type:
13391 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13392 stmt_info, &vec_stmt, slp_node,
13393 NULL);
13394 gcc_assert (done);
13395 break;
13396
13397 case shift_vec_info_type:
13398 done = vectorizable_shift (vinfo, stmt_info,
13399 gsi, &vec_stmt, slp_node, NULL);
13400 gcc_assert (done);
13401 break;
13402
13403 case op_vec_info_type:
13404 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13405 NULL);
13406 gcc_assert (done);
13407 break;
13408
13409 case assignment_vec_info_type:
13410 done = vectorizable_assignment (vinfo, stmt_info,
13411 gsi, &vec_stmt, slp_node, NULL);
13412 gcc_assert (done);
13413 break;
13414
13415 case load_vec_info_type:
13416 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13417 NULL);
13418 gcc_assert (done);
13419 break;
13420
13421 case store_vec_info_type:
13422 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13423 && !slp_node
13424 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13425 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13426 /* In case of interleaving, the whole chain is vectorized when the
13427 last store in the chain is reached. Store stmts before the last
13428 one are skipped, and there vec_stmt_info shouldn't be freed
13429 meanwhile. */
13430 ;
13431 else
13432 {
13433 done = vectorizable_store (vinfo, stmt_info,
13434 gsi, &vec_stmt, slp_node, NULL);
13435 gcc_assert (done);
13436 is_store = true;
13437 }
13438 break;
13439
13440 case condition_vec_info_type:
13441 done = vectorizable_condition (vinfo, stmt_info,
13442 gsi, &vec_stmt, slp_node, NULL);
13443 gcc_assert (done);
13444 break;
13445
13446 case comparison_vec_info_type:
13447 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13448 slp_node, NULL);
13449 gcc_assert (done);
13450 break;
13451
13452 case call_vec_info_type:
13453 done = vectorizable_call (vinfo, stmt_info,
13454 gsi, &vec_stmt, slp_node, NULL);
13455 break;
13456
13457 case call_simd_clone_vec_info_type:
13458 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13459 slp_node, NULL);
13460 break;
13461
13462 case reduc_vec_info_type:
13463 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13464 gsi, &vec_stmt, slp_node);
13465 gcc_assert (done);
13466 break;
13467
13468 case cycle_phi_info_type:
13469 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13470 &vec_stmt, slp_node, slp_node_instance);
13471 gcc_assert (done);
13472 break;
13473
13474 case lc_phi_info_type:
13475 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13476 stmt_info, &vec_stmt, slp_node);
13477 gcc_assert (done);
13478 break;
13479
13480 case recurr_info_type:
13481 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13482 stmt_info, &vec_stmt, slp_node, NULL);
13483 gcc_assert (done);
13484 break;
13485
13486 case phi_info_type:
13487 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13488 gcc_assert (done);
13489 break;
13490
13491 case loop_exit_ctrl_vec_info_type:
13492 done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt,
13493 slp_node, NULL);
13494 gcc_assert (done);
13495 break;
13496
13497 default:
13498 if (!STMT_VINFO_LIVE_P (stmt_info))
13499 {
13500 if (dump_enabled_p ())
13501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13502 "stmt not supported.\n");
13503 gcc_unreachable ();
13504 }
13505 done = true;
13506 }
13507
13508 if (!slp_node && vec_stmt)
13509 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13510
13511 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13512 {
13513 /* Handle stmts whose DEF is used outside the loop-nest that is
13514 being vectorized. */
13515 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13516 slp_node_instance, true, NULL);
13517 gcc_assert (done);
13518 }
13519
13520 if (slp_node)
13521 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13522
13523 return is_store;
13524 }
13525
13526
13527 /* Remove a group of stores (for SLP or interleaving), free their
13528 stmt_vec_info. */
13529
13530 void
13531 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13532 {
13533 stmt_vec_info next_stmt_info = first_stmt_info;
13534
13535 while (next_stmt_info)
13536 {
13537 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13538 next_stmt_info = vect_orig_stmt (next_stmt_info);
13539 /* Free the attached stmt_vec_info and remove the stmt. */
13540 vinfo->remove_stmt (next_stmt_info);
13541 next_stmt_info = tmp;
13542 }
13543 }
13544
13545 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13546 elements of type SCALAR_TYPE, or null if the target doesn't support
13547 such a type.
13548
13549 If NUNITS is zero, return a vector type that contains elements of
13550 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13551
13552 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13553 for this vectorization region and want to "autodetect" the best choice.
13554 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13555 and we want the new type to be interoperable with it. PREVAILING_MODE
13556 in this case can be a scalar integer mode or a vector mode; when it
13557 is a vector mode, the function acts like a tree-level version of
13558 related_vector_mode. */
13559
13560 tree
13561 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13562 tree scalar_type, poly_uint64 nunits)
13563 {
13564 tree orig_scalar_type = scalar_type;
13565 scalar_mode inner_mode;
13566 machine_mode simd_mode;
13567 tree vectype;
13568
13569 if ((!INTEGRAL_TYPE_P (scalar_type)
13570 && !POINTER_TYPE_P (scalar_type)
13571 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13572 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13573 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13574 return NULL_TREE;
13575
13576 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13577
13578 /* Interoperability between modes requires one to be a constant multiple
13579 of the other, so that the number of vectors required for each operation
13580 is a compile-time constant. */
13581 if (prevailing_mode != VOIDmode
13582 && !constant_multiple_p (nunits * nbytes,
13583 GET_MODE_SIZE (prevailing_mode))
13584 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13585 nunits * nbytes))
13586 return NULL_TREE;
13587
13588 /* For vector types of elements whose mode precision doesn't
13589 match their types precision we use a element type of mode
13590 precision. The vectorization routines will have to make sure
13591 they support the proper result truncation/extension.
13592 We also make sure to build vector types with INTEGER_TYPE
13593 component type only. */
13594 if (INTEGRAL_TYPE_P (scalar_type)
13595 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13596 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13597 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13598 TYPE_UNSIGNED (scalar_type));
13599
13600 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13601 When the component mode passes the above test simply use a type
13602 corresponding to that mode. The theory is that any use that
13603 would cause problems with this will disable vectorization anyway. */
13604 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13605 && !INTEGRAL_TYPE_P (scalar_type))
13606 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13607
13608 /* We can't build a vector type of elements with alignment bigger than
13609 their size. */
13610 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13611 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13612 TYPE_UNSIGNED (scalar_type));
13613
13614 /* If we felt back to using the mode fail if there was
13615 no scalar type for it. */
13616 if (scalar_type == NULL_TREE)
13617 return NULL_TREE;
13618
13619 /* If no prevailing mode was supplied, use the mode the target prefers.
13620 Otherwise lookup a vector mode based on the prevailing mode. */
13621 if (prevailing_mode == VOIDmode)
13622 {
13623 gcc_assert (known_eq (nunits, 0U));
13624 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13625 if (SCALAR_INT_MODE_P (simd_mode))
13626 {
13627 /* Traditional behavior is not to take the integer mode
13628 literally, but simply to use it as a way of determining
13629 the vector size. It is up to mode_for_vector to decide
13630 what the TYPE_MODE should be.
13631
13632 Note that nunits == 1 is allowed in order to support single
13633 element vector types. */
13634 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13635 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13636 return NULL_TREE;
13637 }
13638 }
13639 else if (SCALAR_INT_MODE_P (prevailing_mode)
13640 || !related_vector_mode (prevailing_mode,
13641 inner_mode, nunits).exists (&simd_mode))
13642 {
13643 /* Fall back to using mode_for_vector, mostly in the hope of being
13644 able to use an integer mode. */
13645 if (known_eq (nunits, 0U)
13646 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13647 return NULL_TREE;
13648
13649 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13650 return NULL_TREE;
13651 }
13652
13653 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13654
13655 /* In cases where the mode was chosen by mode_for_vector, check that
13656 the target actually supports the chosen mode, or that it at least
13657 allows the vector mode to be replaced by a like-sized integer. */
13658 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13659 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13660 return NULL_TREE;
13661
13662 /* Re-attach the address-space qualifier if we canonicalized the scalar
13663 type. */
13664 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13665 return build_qualified_type
13666 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13667
13668 return vectype;
13669 }
13670
13671 /* Function get_vectype_for_scalar_type.
13672
13673 Returns the vector type corresponding to SCALAR_TYPE as supported
13674 by the target. If GROUP_SIZE is nonzero and we're performing BB
13675 vectorization, make sure that the number of elements in the vector
13676 is no bigger than GROUP_SIZE. */
13677
13678 tree
13679 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13680 unsigned int group_size)
13681 {
13682 /* For BB vectorization, we should always have a group size once we've
13683 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13684 are tentative requests during things like early data reference
13685 analysis and pattern recognition. */
13686 if (is_a <bb_vec_info> (vinfo))
13687 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13688 else
13689 group_size = 0;
13690
13691 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13692 scalar_type);
13693 if (vectype && vinfo->vector_mode == VOIDmode)
13694 vinfo->vector_mode = TYPE_MODE (vectype);
13695
13696 /* Register the natural choice of vector type, before the group size
13697 has been applied. */
13698 if (vectype)
13699 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13700
13701 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13702 try again with an explicit number of elements. */
13703 if (vectype
13704 && group_size
13705 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13706 {
13707 /* Start with the biggest number of units that fits within
13708 GROUP_SIZE and halve it until we find a valid vector type.
13709 Usually either the first attempt will succeed or all will
13710 fail (in the latter case because GROUP_SIZE is too small
13711 for the target), but it's possible that a target could have
13712 a hole between supported vector types.
13713
13714 If GROUP_SIZE is not a power of 2, this has the effect of
13715 trying the largest power of 2 that fits within the group,
13716 even though the group is not a multiple of that vector size.
13717 The BB vectorizer will then try to carve up the group into
13718 smaller pieces. */
13719 unsigned int nunits = 1 << floor_log2 (group_size);
13720 do
13721 {
13722 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13723 scalar_type, nunits);
13724 nunits /= 2;
13725 }
13726 while (nunits > 1 && !vectype);
13727 }
13728
13729 return vectype;
13730 }
13731
13732 /* Return the vector type corresponding to SCALAR_TYPE as supported
13733 by the target. NODE, if nonnull, is the SLP tree node that will
13734 use the returned vector type. */
13735
13736 tree
13737 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13738 {
13739 unsigned int group_size = 0;
13740 if (node)
13741 group_size = SLP_TREE_LANES (node);
13742 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13743 }
13744
13745 /* Function get_mask_type_for_scalar_type.
13746
13747 Returns the mask type corresponding to a result of comparison
13748 of vectors of specified SCALAR_TYPE as supported by target.
13749 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13750 make sure that the number of elements in the vector is no bigger
13751 than GROUP_SIZE. */
13752
13753 tree
13754 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13755 unsigned int group_size)
13756 {
13757 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13758
13759 if (!vectype)
13760 return NULL;
13761
13762 return truth_type_for (vectype);
13763 }
13764
13765 /* Function get_mask_type_for_scalar_type.
13766
13767 Returns the mask type corresponding to a result of comparison
13768 of vectors of specified SCALAR_TYPE as supported by target.
13769 NODE, if nonnull, is the SLP tree node that will use the returned
13770 vector type. */
13771
13772 tree
13773 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13774 slp_tree node)
13775 {
13776 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13777
13778 if (!vectype)
13779 return NULL;
13780
13781 return truth_type_for (vectype);
13782 }
13783
13784 /* Function get_same_sized_vectype
13785
13786 Returns a vector type corresponding to SCALAR_TYPE of size
13787 VECTOR_TYPE if supported by the target. */
13788
13789 tree
13790 get_same_sized_vectype (tree scalar_type, tree vector_type)
13791 {
13792 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13793 return truth_type_for (vector_type);
13794
13795 poly_uint64 nunits;
13796 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13797 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13798 return NULL_TREE;
13799
13800 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13801 scalar_type, nunits);
13802 }
13803
13804 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13805 would not change the chosen vector modes. */
13806
13807 bool
13808 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13809 {
13810 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13811 i != vinfo->used_vector_modes.end (); ++i)
13812 if (!VECTOR_MODE_P (*i)
13813 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13814 return false;
13815 return true;
13816 }
13817
13818 /* Function vect_is_simple_use.
13819
13820 Input:
13821 VINFO - the vect info of the loop or basic block that is being vectorized.
13822 OPERAND - operand in the loop or bb.
13823 Output:
13824 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13825 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13826 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13827 the definition could be anywhere in the function
13828 DT - the type of definition
13829
13830 Returns whether a stmt with OPERAND can be vectorized.
13831 For loops, supportable operands are constants, loop invariants, and operands
13832 that are defined by the current iteration of the loop. Unsupportable
13833 operands are those that are defined by a previous iteration of the loop (as
13834 is the case in reduction/induction computations).
13835 For basic blocks, supportable operands are constants and bb invariants.
13836 For now, operands defined outside the basic block are not supported. */
13837
13838 bool
13839 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13840 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13841 {
13842 if (def_stmt_info_out)
13843 *def_stmt_info_out = NULL;
13844 if (def_stmt_out)
13845 *def_stmt_out = NULL;
13846 *dt = vect_unknown_def_type;
13847
13848 if (dump_enabled_p ())
13849 {
13850 dump_printf_loc (MSG_NOTE, vect_location,
13851 "vect_is_simple_use: operand ");
13852 if (TREE_CODE (operand) == SSA_NAME
13853 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13854 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13855 else
13856 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13857 }
13858
13859 if (CONSTANT_CLASS_P (operand))
13860 *dt = vect_constant_def;
13861 else if (is_gimple_min_invariant (operand))
13862 *dt = vect_external_def;
13863 else if (TREE_CODE (operand) != SSA_NAME)
13864 *dt = vect_unknown_def_type;
13865 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13866 *dt = vect_external_def;
13867 else
13868 {
13869 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13870 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13871 if (!stmt_vinfo)
13872 *dt = vect_external_def;
13873 else
13874 {
13875 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13876 def_stmt = stmt_vinfo->stmt;
13877 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13878 if (def_stmt_info_out)
13879 *def_stmt_info_out = stmt_vinfo;
13880 }
13881 if (def_stmt_out)
13882 *def_stmt_out = def_stmt;
13883 }
13884
13885 if (dump_enabled_p ())
13886 {
13887 dump_printf (MSG_NOTE, ", type of def: ");
13888 switch (*dt)
13889 {
13890 case vect_uninitialized_def:
13891 dump_printf (MSG_NOTE, "uninitialized\n");
13892 break;
13893 case vect_constant_def:
13894 dump_printf (MSG_NOTE, "constant\n");
13895 break;
13896 case vect_external_def:
13897 dump_printf (MSG_NOTE, "external\n");
13898 break;
13899 case vect_internal_def:
13900 dump_printf (MSG_NOTE, "internal\n");
13901 break;
13902 case vect_induction_def:
13903 dump_printf (MSG_NOTE, "induction\n");
13904 break;
13905 case vect_reduction_def:
13906 dump_printf (MSG_NOTE, "reduction\n");
13907 break;
13908 case vect_double_reduction_def:
13909 dump_printf (MSG_NOTE, "double reduction\n");
13910 break;
13911 case vect_nested_cycle:
13912 dump_printf (MSG_NOTE, "nested cycle\n");
13913 break;
13914 case vect_first_order_recurrence:
13915 dump_printf (MSG_NOTE, "first order recurrence\n");
13916 break;
13917 case vect_condition_def:
13918 dump_printf (MSG_NOTE, "control flow\n");
13919 break;
13920 case vect_unknown_def_type:
13921 dump_printf (MSG_NOTE, "unknown\n");
13922 break;
13923 }
13924 }
13925
13926 if (*dt == vect_unknown_def_type)
13927 {
13928 if (dump_enabled_p ())
13929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13930 "Unsupported pattern.\n");
13931 return false;
13932 }
13933
13934 return true;
13935 }
13936
13937 /* Function vect_is_simple_use.
13938
13939 Same as vect_is_simple_use but also determines the vector operand
13940 type of OPERAND and stores it to *VECTYPE. If the definition of
13941 OPERAND is vect_uninitialized_def, vect_constant_def or
13942 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13943 is responsible to compute the best suited vector type for the
13944 scalar operand. */
13945
13946 bool
13947 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13948 tree *vectype, stmt_vec_info *def_stmt_info_out,
13949 gimple **def_stmt_out)
13950 {
13951 stmt_vec_info def_stmt_info;
13952 gimple *def_stmt;
13953 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13954 return false;
13955
13956 if (def_stmt_out)
13957 *def_stmt_out = def_stmt;
13958 if (def_stmt_info_out)
13959 *def_stmt_info_out = def_stmt_info;
13960
13961 /* Now get a vector type if the def is internal, otherwise supply
13962 NULL_TREE and leave it up to the caller to figure out a proper
13963 type for the use stmt. */
13964 if (*dt == vect_internal_def
13965 || *dt == vect_induction_def
13966 || *dt == vect_reduction_def
13967 || *dt == vect_double_reduction_def
13968 || *dt == vect_nested_cycle
13969 || *dt == vect_first_order_recurrence)
13970 {
13971 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13972 gcc_assert (*vectype != NULL_TREE);
13973 if (dump_enabled_p ())
13974 dump_printf_loc (MSG_NOTE, vect_location,
13975 "vect_is_simple_use: vectype %T\n", *vectype);
13976 }
13977 else if (*dt == vect_uninitialized_def
13978 || *dt == vect_constant_def
13979 || *dt == vect_external_def)
13980 *vectype = NULL_TREE;
13981 else
13982 gcc_unreachable ();
13983
13984 return true;
13985 }
13986
13987 /* Function vect_is_simple_use.
13988
13989 Same as vect_is_simple_use but determines the operand by operand
13990 position OPERAND from either STMT or SLP_NODE, filling in *OP
13991 and *SLP_DEF (when SLP_NODE is not NULL). */
13992
13993 bool
13994 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13995 unsigned operand, tree *op, slp_tree *slp_def,
13996 enum vect_def_type *dt,
13997 tree *vectype, stmt_vec_info *def_stmt_info_out)
13998 {
13999 if (slp_node)
14000 {
14001 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
14002 *slp_def = child;
14003 *vectype = SLP_TREE_VECTYPE (child);
14004 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
14005 {
14006 /* ??? VEC_PERM nodes might be intermediate and their lane value
14007 have no representative (nor do we build a VEC_PERM stmt for
14008 the actual operation). Note for two-operator nodes we set
14009 a representative but leave scalar stmts empty as we'd only
14010 have one for a subset of lanes. Ideally no caller would
14011 require *op for internal defs. */
14012 if (SLP_TREE_REPRESENTATIVE (child))
14013 {
14014 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
14015 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
14016 }
14017 else
14018 {
14019 gcc_assert (SLP_TREE_CODE (child) == VEC_PERM_EXPR);
14020 *op = error_mark_node;
14021 *dt = vect_internal_def;
14022 if (def_stmt_info_out)
14023 *def_stmt_info_out = NULL;
14024 return true;
14025 }
14026 }
14027 else
14028 {
14029 if (def_stmt_info_out)
14030 *def_stmt_info_out = NULL;
14031 *op = SLP_TREE_SCALAR_OPS (child)[0];
14032 *dt = SLP_TREE_DEF_TYPE (child);
14033 return true;
14034 }
14035 }
14036 else
14037 {
14038 *slp_def = NULL;
14039 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
14040 {
14041 if (gimple_assign_rhs_code (ass) == COND_EXPR
14042 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
14043 {
14044 if (operand < 2)
14045 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
14046 else
14047 *op = gimple_op (ass, operand);
14048 }
14049 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
14050 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
14051 else
14052 *op = gimple_op (ass, operand + 1);
14053 }
14054 else if (gcond *cond = dyn_cast <gcond *> (stmt->stmt))
14055 *op = gimple_op (cond, operand);
14056 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
14057 *op = gimple_call_arg (call, operand);
14058 else
14059 gcc_unreachable ();
14060 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
14061 }
14062 }
14063
14064 /* If OP is not NULL and is external or constant update its vector
14065 type with VECTYPE. Returns true if successful or false if not,
14066 for example when conflicting vector types are present. */
14067
14068 bool
14069 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
14070 {
14071 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
14072 return true;
14073 if (SLP_TREE_VECTYPE (op))
14074 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
14075 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
14076 should be handled by patters. Allow vect_constant_def for now. */
14077 if (VECTOR_BOOLEAN_TYPE_P (vectype)
14078 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
14079 return false;
14080 SLP_TREE_VECTYPE (op) = vectype;
14081 return true;
14082 }
14083
14084 /* Function supportable_widening_operation
14085
14086 Check whether an operation represented by the code CODE is a
14087 widening operation that is supported by the target platform in
14088 vector form (i.e., when operating on arguments of type VECTYPE_IN
14089 producing a result of type VECTYPE_OUT).
14090
14091 Widening operations we currently support are NOP (CONVERT), FLOAT,
14092 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14093 are supported by the target platform either directly (via vector
14094 tree-codes), or via target builtins.
14095
14096 Output:
14097 - CODE1 and CODE2 are codes of vector operations to be used when
14098 vectorizing the operation, if available.
14099 - MULTI_STEP_CVT determines the number of required intermediate steps in
14100 case of multi-step conversion (like char->short->int - in that case
14101 MULTI_STEP_CVT will be 1).
14102 - INTERM_TYPES contains the intermediate type required to perform the
14103 widening operation (short in the above example). */
14104
14105 bool
14106 supportable_widening_operation (vec_info *vinfo,
14107 code_helper code,
14108 stmt_vec_info stmt_info,
14109 tree vectype_out, tree vectype_in,
14110 code_helper *code1,
14111 code_helper *code2,
14112 int *multi_step_cvt,
14113 vec<tree> *interm_types)
14114 {
14115 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
14116 class loop *vect_loop = NULL;
14117 machine_mode vec_mode;
14118 enum insn_code icode1, icode2;
14119 optab optab1 = unknown_optab, optab2 = unknown_optab;
14120 tree vectype = vectype_in;
14121 tree wide_vectype = vectype_out;
14122 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
14123 int i;
14124 tree prev_type, intermediate_type;
14125 machine_mode intermediate_mode, prev_mode;
14126 optab optab3, optab4;
14127
14128 *multi_step_cvt = 0;
14129 if (loop_info)
14130 vect_loop = LOOP_VINFO_LOOP (loop_info);
14131
14132 switch (code.safe_as_tree_code ())
14133 {
14134 case MAX_TREE_CODES:
14135 /* Don't set c1 and c2 if code is not a tree_code. */
14136 break;
14137
14138 case WIDEN_MULT_EXPR:
14139 /* The result of a vectorized widening operation usually requires
14140 two vectors (because the widened results do not fit into one vector).
14141 The generated vector results would normally be expected to be
14142 generated in the same order as in the original scalar computation,
14143 i.e. if 8 results are generated in each vector iteration, they are
14144 to be organized as follows:
14145 vect1: [res1,res2,res3,res4],
14146 vect2: [res5,res6,res7,res8].
14147
14148 However, in the special case that the result of the widening
14149 operation is used in a reduction computation only, the order doesn't
14150 matter (because when vectorizing a reduction we change the order of
14151 the computation). Some targets can take advantage of this and
14152 generate more efficient code. For example, targets like Altivec,
14153 that support widen_mult using a sequence of {mult_even,mult_odd}
14154 generate the following vectors:
14155 vect1: [res1,res3,res5,res7],
14156 vect2: [res2,res4,res6,res8].
14157
14158 When vectorizing outer-loops, we execute the inner-loop sequentially
14159 (each vectorized inner-loop iteration contributes to VF outer-loop
14160 iterations in parallel). We therefore don't allow to change the
14161 order of the computation in the inner-loop during outer-loop
14162 vectorization. */
14163 /* TODO: Another case in which order doesn't *really* matter is when we
14164 widen and then contract again, e.g. (short)((int)x * y >> 8).
14165 Normally, pack_trunc performs an even/odd permute, whereas the
14166 repack from an even/odd expansion would be an interleave, which
14167 would be significantly simpler for e.g. AVX2. */
14168 /* In any case, in order to avoid duplicating the code below, recurse
14169 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14170 are properly set up for the caller. If we fail, we'll continue with
14171 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14172 if (vect_loop
14173 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
14174 && !nested_in_vect_loop_p (vect_loop, stmt_info)
14175 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
14176 stmt_info, vectype_out,
14177 vectype_in, code1,
14178 code2, multi_step_cvt,
14179 interm_types))
14180 {
14181 /* Elements in a vector with vect_used_by_reduction property cannot
14182 be reordered if the use chain with this property does not have the
14183 same operation. One such an example is s += a * b, where elements
14184 in a and b cannot be reordered. Here we check if the vector defined
14185 by STMT is only directly used in the reduction statement. */
14186 tree lhs = gimple_assign_lhs (stmt_info->stmt);
14187 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
14188 if (use_stmt_info
14189 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
14190 return true;
14191 }
14192 c1 = VEC_WIDEN_MULT_LO_EXPR;
14193 c2 = VEC_WIDEN_MULT_HI_EXPR;
14194 break;
14195
14196 case DOT_PROD_EXPR:
14197 c1 = DOT_PROD_EXPR;
14198 c2 = DOT_PROD_EXPR;
14199 break;
14200
14201 case SAD_EXPR:
14202 c1 = SAD_EXPR;
14203 c2 = SAD_EXPR;
14204 break;
14205
14206 case VEC_WIDEN_MULT_EVEN_EXPR:
14207 /* Support the recursion induced just above. */
14208 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
14209 c2 = VEC_WIDEN_MULT_ODD_EXPR;
14210 break;
14211
14212 case WIDEN_LSHIFT_EXPR:
14213 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
14214 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
14215 break;
14216
14217 CASE_CONVERT:
14218 c1 = VEC_UNPACK_LO_EXPR;
14219 c2 = VEC_UNPACK_HI_EXPR;
14220 break;
14221
14222 case FLOAT_EXPR:
14223 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
14224 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
14225 break;
14226
14227 case FIX_TRUNC_EXPR:
14228 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
14229 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
14230 break;
14231
14232 default:
14233 gcc_unreachable ();
14234 }
14235
14236 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
14237 std::swap (c1, c2);
14238
14239 if (code == FIX_TRUNC_EXPR)
14240 {
14241 /* The signedness is determined from output operand. */
14242 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14243 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14244 }
14245 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
14246 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14247 && VECTOR_BOOLEAN_TYPE_P (vectype)
14248 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14249 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14250 {
14251 /* If the input and result modes are the same, a different optab
14252 is needed where we pass in the number of units in vectype. */
14253 optab1 = vec_unpacks_sbool_lo_optab;
14254 optab2 = vec_unpacks_sbool_hi_optab;
14255 }
14256
14257 vec_mode = TYPE_MODE (vectype);
14258 if (widening_fn_p (code))
14259 {
14260 /* If this is an internal fn then we must check whether the target
14261 supports either a low-high split or an even-odd split. */
14262 internal_fn ifn = as_internal_fn ((combined_fn) code);
14263
14264 internal_fn lo, hi, even, odd;
14265 lookup_hilo_internal_fn (ifn, &lo, &hi);
14266 *code1 = as_combined_fn (lo);
14267 *code2 = as_combined_fn (hi);
14268 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
14269 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
14270
14271 /* If we don't support low-high, then check for even-odd. */
14272 if (!optab1
14273 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14274 || !optab2
14275 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14276 {
14277 lookup_evenodd_internal_fn (ifn, &even, &odd);
14278 *code1 = as_combined_fn (even);
14279 *code2 = as_combined_fn (odd);
14280 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
14281 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
14282 }
14283 }
14284 else if (code.is_tree_code ())
14285 {
14286 if (code == FIX_TRUNC_EXPR)
14287 {
14288 /* The signedness is determined from output operand. */
14289 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14290 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14291 }
14292 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
14293 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14294 && VECTOR_BOOLEAN_TYPE_P (vectype)
14295 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14296 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14297 {
14298 /* If the input and result modes are the same, a different optab
14299 is needed where we pass in the number of units in vectype. */
14300 optab1 = vec_unpacks_sbool_lo_optab;
14301 optab2 = vec_unpacks_sbool_hi_optab;
14302 }
14303 else
14304 {
14305 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14306 optab2 = optab_for_tree_code (c2, vectype, optab_default);
14307 }
14308 *code1 = c1;
14309 *code2 = c2;
14310 }
14311
14312 if (!optab1 || !optab2)
14313 return false;
14314
14315 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14316 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14317 return false;
14318
14319
14320 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14321 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14322 {
14323 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14324 return true;
14325 /* For scalar masks we may have different boolean
14326 vector types having the same QImode. Thus we
14327 add additional check for elements number. */
14328 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
14329 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14330 return true;
14331 }
14332
14333 /* Check if it's a multi-step conversion that can be done using intermediate
14334 types. */
14335
14336 prev_type = vectype;
14337 prev_mode = vec_mode;
14338
14339 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
14340 return false;
14341
14342 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14343 intermediate steps in promotion sequence. We try
14344 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14345 not. */
14346 interm_types->create (MAX_INTERM_CVT_STEPS);
14347 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14348 {
14349 intermediate_mode = insn_data[icode1].operand[0].mode;
14350 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14351 intermediate_type
14352 = vect_halve_mask_nunits (prev_type, intermediate_mode);
14353 else if (VECTOR_MODE_P (intermediate_mode))
14354 {
14355 tree intermediate_element_type
14356 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
14357 TYPE_UNSIGNED (prev_type));
14358 intermediate_type
14359 = build_vector_type_for_mode (intermediate_element_type,
14360 intermediate_mode);
14361 }
14362 else
14363 intermediate_type
14364 = lang_hooks.types.type_for_mode (intermediate_mode,
14365 TYPE_UNSIGNED (prev_type));
14366
14367 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14368 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14369 && intermediate_mode == prev_mode
14370 && SCALAR_INT_MODE_P (prev_mode))
14371 {
14372 /* If the input and result modes are the same, a different optab
14373 is needed where we pass in the number of units in vectype. */
14374 optab3 = vec_unpacks_sbool_lo_optab;
14375 optab4 = vec_unpacks_sbool_hi_optab;
14376 }
14377 else
14378 {
14379 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
14380 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
14381 }
14382
14383 if (!optab3 || !optab4
14384 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
14385 || insn_data[icode1].operand[0].mode != intermediate_mode
14386 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
14387 || insn_data[icode2].operand[0].mode != intermediate_mode
14388 || ((icode1 = optab_handler (optab3, intermediate_mode))
14389 == CODE_FOR_nothing)
14390 || ((icode2 = optab_handler (optab4, intermediate_mode))
14391 == CODE_FOR_nothing))
14392 break;
14393
14394 interm_types->quick_push (intermediate_type);
14395 (*multi_step_cvt)++;
14396
14397 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14398 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14399 {
14400 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14401 return true;
14402 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14403 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14404 return true;
14405 }
14406
14407 prev_type = intermediate_type;
14408 prev_mode = intermediate_mode;
14409 }
14410
14411 interm_types->release ();
14412 return false;
14413 }
14414
14415
14416 /* Function supportable_narrowing_operation
14417
14418 Check whether an operation represented by the code CODE is a
14419 narrowing operation that is supported by the target platform in
14420 vector form (i.e., when operating on arguments of type VECTYPE_IN
14421 and producing a result of type VECTYPE_OUT).
14422
14423 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14424 and FLOAT. This function checks if these operations are supported by
14425 the target platform directly via vector tree-codes.
14426
14427 Output:
14428 - CODE1 is the code of a vector operation to be used when
14429 vectorizing the operation, if available.
14430 - MULTI_STEP_CVT determines the number of required intermediate steps in
14431 case of multi-step conversion (like int->short->char - in that case
14432 MULTI_STEP_CVT will be 1).
14433 - INTERM_TYPES contains the intermediate type required to perform the
14434 narrowing operation (short in the above example). */
14435
14436 bool
14437 supportable_narrowing_operation (code_helper code,
14438 tree vectype_out, tree vectype_in,
14439 code_helper *code1, int *multi_step_cvt,
14440 vec<tree> *interm_types)
14441 {
14442 machine_mode vec_mode;
14443 enum insn_code icode1;
14444 optab optab1, interm_optab;
14445 tree vectype = vectype_in;
14446 tree narrow_vectype = vectype_out;
14447 enum tree_code c1;
14448 tree intermediate_type, prev_type;
14449 machine_mode intermediate_mode, prev_mode;
14450 int i;
14451 unsigned HOST_WIDE_INT n_elts;
14452 bool uns;
14453
14454 if (!code.is_tree_code ())
14455 return false;
14456
14457 *multi_step_cvt = 0;
14458 switch ((tree_code) code)
14459 {
14460 CASE_CONVERT:
14461 c1 = VEC_PACK_TRUNC_EXPR;
14462 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14463 && VECTOR_BOOLEAN_TYPE_P (vectype)
14464 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14465 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14466 && n_elts < BITS_PER_UNIT)
14467 optab1 = vec_pack_sbool_trunc_optab;
14468 else
14469 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14470 break;
14471
14472 case FIX_TRUNC_EXPR:
14473 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14474 /* The signedness is determined from output operand. */
14475 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14476 break;
14477
14478 case FLOAT_EXPR:
14479 c1 = VEC_PACK_FLOAT_EXPR;
14480 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14481 break;
14482
14483 default:
14484 gcc_unreachable ();
14485 }
14486
14487 if (!optab1)
14488 return false;
14489
14490 vec_mode = TYPE_MODE (vectype);
14491 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14492 return false;
14493
14494 *code1 = c1;
14495
14496 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14497 {
14498 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14499 return true;
14500 /* For scalar masks we may have different boolean
14501 vector types having the same QImode. Thus we
14502 add additional check for elements number. */
14503 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14504 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14505 return true;
14506 }
14507
14508 if (code == FLOAT_EXPR)
14509 return false;
14510
14511 /* Check if it's a multi-step conversion that can be done using intermediate
14512 types. */
14513 prev_mode = vec_mode;
14514 prev_type = vectype;
14515 if (code == FIX_TRUNC_EXPR)
14516 uns = TYPE_UNSIGNED (vectype_out);
14517 else
14518 uns = TYPE_UNSIGNED (vectype);
14519
14520 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14521 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14522 costly than signed. */
14523 if (code == FIX_TRUNC_EXPR && uns)
14524 {
14525 enum insn_code icode2;
14526
14527 intermediate_type
14528 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14529 interm_optab
14530 = optab_for_tree_code (c1, intermediate_type, optab_default);
14531 if (interm_optab != unknown_optab
14532 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14533 && insn_data[icode1].operand[0].mode
14534 == insn_data[icode2].operand[0].mode)
14535 {
14536 uns = false;
14537 optab1 = interm_optab;
14538 icode1 = icode2;
14539 }
14540 }
14541
14542 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14543 intermediate steps in promotion sequence. We try
14544 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14545 interm_types->create (MAX_INTERM_CVT_STEPS);
14546 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14547 {
14548 intermediate_mode = insn_data[icode1].operand[0].mode;
14549 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14550 intermediate_type
14551 = vect_double_mask_nunits (prev_type, intermediate_mode);
14552 else
14553 intermediate_type
14554 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14555 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14556 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14557 && SCALAR_INT_MODE_P (prev_mode)
14558 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14559 && n_elts < BITS_PER_UNIT)
14560 interm_optab = vec_pack_sbool_trunc_optab;
14561 else
14562 interm_optab
14563 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14564 optab_default);
14565 if (!interm_optab
14566 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14567 || insn_data[icode1].operand[0].mode != intermediate_mode
14568 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14569 == CODE_FOR_nothing))
14570 break;
14571
14572 interm_types->quick_push (intermediate_type);
14573 (*multi_step_cvt)++;
14574
14575 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14576 {
14577 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14578 return true;
14579 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14580 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14581 return true;
14582 }
14583
14584 prev_mode = intermediate_mode;
14585 prev_type = intermediate_type;
14586 optab1 = interm_optab;
14587 }
14588
14589 interm_types->release ();
14590 return false;
14591 }
14592
14593 /* Generate and return a vector mask of MASK_TYPE such that
14594 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14595 Add the statements to SEQ. */
14596
14597 tree
14598 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14599 tree end_index, const char *name)
14600 {
14601 tree cmp_type = TREE_TYPE (start_index);
14602 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14603 cmp_type, mask_type,
14604 OPTIMIZE_FOR_SPEED));
14605 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14606 start_index, end_index,
14607 build_zero_cst (mask_type));
14608 tree tmp;
14609 if (name)
14610 tmp = make_temp_ssa_name (mask_type, NULL, name);
14611 else
14612 tmp = make_ssa_name (mask_type);
14613 gimple_call_set_lhs (call, tmp);
14614 gimple_seq_add_stmt (seq, call);
14615 return tmp;
14616 }
14617
14618 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14619 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14620
14621 tree
14622 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14623 tree end_index)
14624 {
14625 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14626 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14627 }
14628
14629 /* Try to compute the vector types required to vectorize STMT_INFO,
14630 returning true on success and false if vectorization isn't possible.
14631 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14632 take sure that the number of elements in the vectors is no bigger
14633 than GROUP_SIZE.
14634
14635 On success:
14636
14637 - Set *STMT_VECTYPE_OUT to:
14638 - NULL_TREE if the statement doesn't need to be vectorized;
14639 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14640
14641 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14642 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14643 statement does not help to determine the overall number of units. */
14644
14645 opt_result
14646 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14647 tree *stmt_vectype_out,
14648 tree *nunits_vectype_out,
14649 unsigned int group_size)
14650 {
14651 gimple *stmt = stmt_info->stmt;
14652
14653 /* For BB vectorization, we should always have a group size once we've
14654 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14655 are tentative requests during things like early data reference
14656 analysis and pattern recognition. */
14657 if (is_a <bb_vec_info> (vinfo))
14658 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14659 else
14660 group_size = 0;
14661
14662 *stmt_vectype_out = NULL_TREE;
14663 *nunits_vectype_out = NULL_TREE;
14664
14665 if (gimple_get_lhs (stmt) == NULL_TREE
14666 /* Allow vector conditionals through here. */
14667 && !is_a <gcond *> (stmt)
14668 /* MASK_STORE has no lhs, but is ok. */
14669 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
14670 {
14671 if (is_a <gcall *> (stmt))
14672 {
14673 /* Ignore calls with no lhs. These must be calls to
14674 #pragma omp simd functions, and what vectorization factor
14675 it really needs can't be determined until
14676 vectorizable_simd_clone_call. */
14677 if (dump_enabled_p ())
14678 dump_printf_loc (MSG_NOTE, vect_location,
14679 "defer to SIMD clone analysis.\n");
14680 return opt_result::success ();
14681 }
14682
14683 return opt_result::failure_at (stmt,
14684 "not vectorized: irregular stmt: %G", stmt);
14685 }
14686
14687 tree vectype;
14688 tree scalar_type = NULL_TREE;
14689 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14690 {
14691 vectype = STMT_VINFO_VECTYPE (stmt_info);
14692 if (dump_enabled_p ())
14693 dump_printf_loc (MSG_NOTE, vect_location,
14694 "precomputed vectype: %T\n", vectype);
14695 }
14696 else if (vect_use_mask_type_p (stmt_info))
14697 {
14698 unsigned int precision = stmt_info->mask_precision;
14699 scalar_type = build_nonstandard_integer_type (precision, 1);
14700 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14701 if (!vectype)
14702 return opt_result::failure_at (stmt, "not vectorized: unsupported"
14703 " data-type %T\n", scalar_type);
14704 if (dump_enabled_p ())
14705 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14706 }
14707 else
14708 {
14709 /* If we got here with a gcond it means that the target had no available vector
14710 mode for the scalar type. We can't vectorize so abort. */
14711 if (is_a <gcond *> (stmt))
14712 return opt_result::failure_at (stmt,
14713 "not vectorized:"
14714 " unsupported data-type for gcond %T\n",
14715 scalar_type);
14716
14717 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14718 scalar_type = TREE_TYPE (DR_REF (dr));
14719 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
14720 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14721 else
14722 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14723
14724 if (dump_enabled_p ())
14725 {
14726 if (group_size)
14727 dump_printf_loc (MSG_NOTE, vect_location,
14728 "get vectype for scalar type (group size %d):"
14729 " %T\n", group_size, scalar_type);
14730 else
14731 dump_printf_loc (MSG_NOTE, vect_location,
14732 "get vectype for scalar type: %T\n", scalar_type);
14733 }
14734 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14735 if (!vectype)
14736 return opt_result::failure_at (stmt,
14737 "not vectorized:"
14738 " unsupported data-type %T\n",
14739 scalar_type);
14740
14741 if (dump_enabled_p ())
14742 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14743 }
14744
14745 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14746 return opt_result::failure_at (stmt,
14747 "not vectorized: vector stmt in loop:%G",
14748 stmt);
14749
14750 *stmt_vectype_out = vectype;
14751
14752 /* Don't try to compute scalar types if the stmt produces a boolean
14753 vector; use the existing vector type instead. */
14754 tree nunits_vectype = vectype;
14755 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14756 {
14757 /* The number of units is set according to the smallest scalar
14758 type (or the largest vector size, but we only support one
14759 vector size per vectorization). */
14760 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14761 TREE_TYPE (vectype));
14762 if (scalar_type != TREE_TYPE (vectype))
14763 {
14764 if (dump_enabled_p ())
14765 dump_printf_loc (MSG_NOTE, vect_location,
14766 "get vectype for smallest scalar type: %T\n",
14767 scalar_type);
14768 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14769 group_size);
14770 if (!nunits_vectype)
14771 return opt_result::failure_at
14772 (stmt, "not vectorized: unsupported data-type %T\n",
14773 scalar_type);
14774 if (dump_enabled_p ())
14775 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14776 nunits_vectype);
14777 }
14778 }
14779
14780 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14781 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14782 return opt_result::failure_at (stmt,
14783 "Not vectorized: Incompatible number "
14784 "of vector subparts between %T and %T\n",
14785 nunits_vectype, *stmt_vectype_out);
14786
14787 if (dump_enabled_p ())
14788 {
14789 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14790 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14791 dump_printf (MSG_NOTE, "\n");
14792 }
14793
14794 *nunits_vectype_out = nunits_vectype;
14795 return opt_result::success ();
14796 }
14797
14798 /* Generate and return statement sequence that sets vector length LEN that is:
14799
14800 min_of_start_and_end = min (START_INDEX, END_INDEX);
14801 left_len = END_INDEX - min_of_start_and_end;
14802 rhs = min (left_len, LEN_LIMIT);
14803 LEN = rhs;
14804
14805 Note: the cost of the code generated by this function is modeled
14806 by vect_estimate_min_profitable_iters, so changes here may need
14807 corresponding changes there. */
14808
14809 gimple_seq
14810 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14811 {
14812 gimple_seq stmts = NULL;
14813 tree len_type = TREE_TYPE (len);
14814 gcc_assert (TREE_TYPE (start_index) == len_type);
14815
14816 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14817 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14818 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14819 gimple* stmt = gimple_build_assign (len, rhs);
14820 gimple_seq_add_stmt (&stmts, stmt);
14821
14822 return stmts;
14823 }
14824