]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-stmts.cc
Change references of .c files to .cc files
[thirdparty/gcc.git] / gcc / tree-vect-stmts.cc
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
645
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 }
649 }
650
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
653 {
654 use_operand_p use_p;
655 ssa_op_iter iter;
656
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
669
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
677
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679 {
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
689
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
697
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
705
706 default:
707 break;
708 }
709
710 if (is_pattern_stmt_p (stmt_vinfo))
711 {
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 {
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
719
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 {
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
733 }
734 for (; i < gimple_num_ops (assign); i++)
735 {
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
738 {
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
744 }
745 }
746 }
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 {
749 for (i = 0; i < gimple_call_num_args (call); i++)
750 {
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
757 }
758 }
759 }
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762 {
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
769 }
770
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 {
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 {
781 if (fatal)
782 *fatal = false;
783 return res;
784 }
785 }
786 } /* while worklist */
787
788 return opt_result::success ();
789 }
790
791 /* Function vect_model_simple_cost.
792
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
796
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
805 {
806 int inside_cost = 0, prologue_cost = 0;
807
808 gcc_assert (cost_vec != NULL);
809
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
822
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
826
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832
833
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
841
842 static void
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 enum vect_def_type *dt,
845 unsigned int ncopies, int pwr,
846 stmt_vector_for_cost *cost_vec,
847 bool widen_arith)
848 {
849 int i;
850 int inside_cost = 0, prologue_cost = 0;
851
852 for (i = 0; i < pwr + 1; i++)
853 {
854 inside_cost += record_stmt_cost (cost_vec, ncopies,
855 widen_arith
856 ? vector_stmt : vec_promote_demote,
857 stmt_info, 0, vect_body);
858 ncopies *= 2;
859 }
860
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
866
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
871 }
872
873 /* Returns true if the current function returns DECL. */
874
875 static bool
876 cfun_returns (tree decl)
877 {
878 edge_iterator ei;
879 edge e;
880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
881 {
882 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
883 if (!ret)
884 continue;
885 if (gimple_return_retval (ret) == decl)
886 return true;
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
889 though. */
890 gimple *def = ret;
891 do
892 {
893 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
894 }
895 while (gimple_clobber_p (def));
896 if (is_a <gassign *> (def)
897 && gimple_assign_lhs (def) == gimple_return_retval (ret)
898 && gimple_assign_rhs1 (def) == decl)
899 return true;
900 }
901 return false;
902 }
903
904 /* Function vect_model_store_cost
905
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
908
909 static void
910 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
911 vect_memory_access_type memory_access_type,
912 dr_alignment_support alignment_support_scheme,
913 int misalignment,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
916 {
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
924
925 if (vls_type == VLS_STORE_INVARIANT)
926 {
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
930 }
931
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
936
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
941
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
948 {
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
955
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
960 }
961
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
966 {
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
972 }
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
975 misalignment, &inside_cost, cost_vec);
976
977 if (memory_access_type == VMAT_ELEMENTWISE
978 || memory_access_type == VMAT_STRIDED_SLP)
979 {
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
982 inside_cost += record_stmt_cost (cost_vec,
983 ncopies * assumed_nunits,
984 vec_to_scalar, stmt_info, 0, vect_body);
985 }
986
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
992 if (base
993 && (TREE_CODE (base) == RESULT_DECL
994 || (DECL_P (base) && cfun_returns (base)))
995 && !aggregate_value_p (base, cfun->decl))
996 {
997 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
999 if (REG_P (reg))
1000 {
1001 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1004 if (nregs > 1)
1005 {
1006 /* Spill. */
1007 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1008 vector_store,
1009 stmt_info, 0, vect_epilogue);
1010 /* Loads. */
1011 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1012 scalar_load,
1013 stmt_info, 0, vect_epilogue);
1014 }
1015 }
1016 }
1017
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1022 }
1023
1024
1025 /* Calculate cost of DR's memory access. */
1026 void
1027 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1028 dr_alignment_support alignment_support_scheme,
1029 int misalignment,
1030 unsigned int *inside_cost,
1031 stmt_vector_for_cost *body_cost_vec)
1032 {
1033 switch (alignment_support_scheme)
1034 {
1035 case dr_aligned:
1036 {
1037 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 vector_store, stmt_info, 0,
1039 vect_body);
1040
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: aligned.\n");
1044 break;
1045 }
1046
1047 case dr_unaligned_supported:
1048 {
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1051 unaligned_store, stmt_info,
1052 misalignment, vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_store_cost: unaligned supported by "
1056 "hardware.\n");
1057 break;
1058 }
1059
1060 case dr_unaligned_unsupported:
1061 {
1062 *inside_cost = VECT_MAX_COST;
1063
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1066 "vect_model_store_cost: unsupported access.\n");
1067 break;
1068 }
1069
1070 default:
1071 gcc_unreachable ();
1072 }
1073 }
1074
1075
1076 /* Function vect_model_load_cost
1077
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1082
1083 static void
1084 vect_model_load_cost (vec_info *vinfo,
1085 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1086 vect_memory_access_type memory_access_type,
1087 dr_alignment_support alignment_support_scheme,
1088 int misalignment,
1089 gather_scatter_info *gs_info,
1090 slp_tree slp_node,
1091 stmt_vector_for_cost *cost_vec)
1092 {
1093 unsigned int inside_cost = 0, prologue_cost = 0;
1094 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1095
1096 gcc_assert (cost_vec);
1097
1098 /* ??? Somehow we need to fix this at the callers. */
1099 if (slp_node)
1100 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1101
1102 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1103 {
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms, n_loads;
1109 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1110 vf, true, &n_perms, &n_loads);
1111 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1112 first_stmt_info, 0, vect_body);
1113
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 ncopies = n_loads;
1117 }
1118
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info = stmt_info;
1122 if (!slp_node && grouped_access_p)
1123 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1124
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p = (first_stmt_info == stmt_info);
1129
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1133 {
1134 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1135 stmt_vec_info next_stmt_info = first_stmt_info;
1136 do
1137 {
1138 gaps -= 1;
1139 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1140 }
1141 while (next_stmt_info);
1142 if (gaps)
1143 {
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: %d unused vectors.\n",
1147 gaps);
1148 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1149 alignment_support_scheme, misalignment, false,
1150 &inside_cost, &prologue_cost,
1151 cost_vec, cost_vec, true);
1152 }
1153 }
1154
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1159 if (first_stmt_p
1160 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1161 {
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size = DR_GROUP_SIZE (first_stmt_info);
1165 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1166 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1167 stmt_info, 0, vect_body);
1168
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE, vect_location,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1172 group_size);
1173 }
1174
1175 /* The loads themselves. */
1176 if (memory_access_type == VMAT_ELEMENTWISE
1177 || memory_access_type == VMAT_GATHER_SCATTER)
1178 {
1179 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1180 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1181 if (memory_access_type == VMAT_GATHER_SCATTER
1182 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1185 the load). */
1186 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1187 vec_to_scalar, stmt_info, 0,
1188 vect_body);
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost += record_stmt_cost (cost_vec,
1191 ncopies * assumed_nunits,
1192 scalar_load, stmt_info, 0, vect_body);
1193 }
1194 else if (memory_access_type == VMAT_INVARIANT)
1195 {
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost += record_stmt_cost (cost_vec, 1,
1198 scalar_load, stmt_info, 0,
1199 vect_prologue);
1200 prologue_cost += record_stmt_cost (cost_vec, 1,
1201 scalar_to_vec, stmt_info, 0,
1202 vect_prologue);
1203 }
1204 else
1205 vect_get_load_cost (vinfo, stmt_info, ncopies,
1206 alignment_support_scheme, misalignment, first_stmt_p,
1207 &inside_cost, &prologue_cost,
1208 cost_vec, cost_vec, true);
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_STRIDED_SLP
1211 || (memory_access_type == VMAT_GATHER_SCATTER
1212 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1213 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1214 stmt_info, 0, vect_body);
1215
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1220 }
1221
1222
1223 /* Calculate cost of DR's memory access. */
1224 void
1225 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1226 dr_alignment_support alignment_support_scheme,
1227 int misalignment,
1228 bool add_realign_cost, unsigned int *inside_cost,
1229 unsigned int *prologue_cost,
1230 stmt_vector_for_cost *prologue_cost_vec,
1231 stmt_vector_for_cost *body_cost_vec,
1232 bool record_prologue_costs)
1233 {
1234 switch (alignment_support_scheme)
1235 {
1236 case dr_aligned:
1237 {
1238 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1239 stmt_info, 0, vect_body);
1240
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "vect_model_load_cost: aligned.\n");
1244
1245 break;
1246 }
1247 case dr_unaligned_supported:
1248 {
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1251 unaligned_load, stmt_info,
1252 misalignment, vect_body);
1253
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1258
1259 break;
1260 }
1261 case dr_explicit_realign:
1262 {
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1267
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1274
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1278
1279 break;
1280 }
1281 case dr_explicit_realign_optimized:
1282 {
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1287
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1294
1295 if (add_realign_cost && record_prologue_costs)
1296 {
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1304 }
1305
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1310
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1315
1316 break;
1317 }
1318
1319 case dr_unaligned_unsupported:
1320 {
1321 *inside_cost = VECT_MAX_COST;
1322
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1327 }
1328
1329 default:
1330 gcc_unreachable ();
1331 }
1332 }
1333
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1336
1337 static void
1338 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1340 {
1341 if (gsi)
1342 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1343 else
1344 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1345
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE, vect_location,
1348 "created new init_stmt: %G", new_stmt);
1349 }
1350
1351 /* Function vect_init_vector.
1352
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1360
1361 tree
1362 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1363 gimple_stmt_iterator *gsi)
1364 {
1365 gimple *init_stmt;
1366 tree new_temp;
1367
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1370 {
1371 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1372 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1373 {
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type))
1377 {
1378 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1379 tree false_val = build_zero_cst (TREE_TYPE (type));
1380
1381 if (CONSTANT_CLASS_P (val))
1382 val = integer_zerop (val) ? false_val : true_val;
1383 else
1384 {
1385 new_temp = make_ssa_name (TREE_TYPE (type));
1386 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1387 val, true_val, false_val);
1388 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1389 val = new_temp;
1390 }
1391 }
1392 else
1393 {
1394 gimple_seq stmts = NULL;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1396 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1397 TREE_TYPE (type), val);
1398 else
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1402 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1403 !gsi_end_p (gsi2); )
1404 {
1405 init_stmt = gsi_stmt (gsi2);
1406 gsi_remove (&gsi2, false);
1407 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1408 }
1409 }
1410 }
1411 val = build_vector_from_val (type, val);
1412 }
1413
1414 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1415 init_stmt = gimple_build_assign (new_temp, val);
1416 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1417 return new_temp;
1418 }
1419
1420
1421 /* Function vect_get_vec_defs_for_operand.
1422
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1425
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1428
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1432
1433 void
1434 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1435 unsigned ncopies,
1436 tree op, vec<tree> *vec_oprnds, tree vectype)
1437 {
1438 gimple *def_stmt;
1439 enum vect_def_type dt;
1440 bool is_simple_use;
1441 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1442
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE, vect_location,
1445 "vect_get_vec_defs_for_operand: %T\n", op);
1446
1447 stmt_vec_info def_stmt_info;
1448 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1449 &def_stmt_info, &def_stmt);
1450 gcc_assert (is_simple_use);
1451 if (def_stmt && dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1453
1454 vec_oprnds->create (ncopies);
1455 if (dt == vect_constant_def || dt == vect_external_def)
1456 {
1457 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1458 tree vector_type;
1459
1460 if (vectype)
1461 vector_type = vectype;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1464 vector_type = truth_type_for (stmt_vectype);
1465 else
1466 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1467
1468 gcc_assert (vector_type);
1469 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1470 while (ncopies--)
1471 vec_oprnds->quick_push (vop);
1472 }
1473 else
1474 {
1475 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1477 for (unsigned i = 0; i < ncopies; ++i)
1478 vec_oprnds->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1480 }
1481 }
1482
1483
1484 /* Get vectorized definitions for OP0 and OP1. */
1485
1486 void
1487 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1488 unsigned ncopies,
1489 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1490 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1491 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1492 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1493 {
1494 if (slp_node)
1495 {
1496 if (op0)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1498 if (op1)
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1500 if (op2)
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1502 if (op3)
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1504 }
1505 else
1506 {
1507 if (op0)
1508 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1509 op0, vec_oprnds0, vectype0);
1510 if (op1)
1511 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1512 op1, vec_oprnds1, vectype1);
1513 if (op2)
1514 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1515 op2, vec_oprnds2, vectype2);
1516 if (op3)
1517 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1518 op3, vec_oprnds3, vectype3);
1519 }
1520 }
1521
1522 void
1523 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1524 unsigned ncopies,
1525 tree op0, vec<tree> *vec_oprnds0,
1526 tree op1, vec<tree> *vec_oprnds1,
1527 tree op2, vec<tree> *vec_oprnds2,
1528 tree op3, vec<tree> *vec_oprnds3)
1529 {
1530 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1531 op0, vec_oprnds0, NULL_TREE,
1532 op1, vec_oprnds1, NULL_TREE,
1533 op2, vec_oprnds2, NULL_TREE,
1534 op3, vec_oprnds3, NULL_TREE);
1535 }
1536
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1540
1541 static void
1542 vect_finish_stmt_generation_1 (vec_info *,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1544 {
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1547
1548 if (stmt_info)
1549 {
1550 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1551
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1556 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1557 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1558 }
1559 else
1560 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1561 }
1562
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1566
1567 void
1568 vect_finish_replace_stmt (vec_info *vinfo,
1569 stmt_vec_info stmt_info, gimple *vec_stmt)
1570 {
1571 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1572 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1573
1574 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1575 gsi_replace (&gsi, vec_stmt, true);
1576
1577 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1578 }
1579
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1582
1583 void
1584 vect_finish_stmt_generation (vec_info *vinfo,
1585 stmt_vec_info stmt_info, gimple *vec_stmt,
1586 gimple_stmt_iterator *gsi)
1587 {
1588 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1589
1590 if (!gsi_end_p (*gsi)
1591 && gimple_has_mem_ops (vec_stmt))
1592 {
1593 gimple *at_stmt = gsi_stmt (*gsi);
1594 tree vuse = gimple_vuse (at_stmt);
1595 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1596 {
1597 tree vdef = gimple_vdef (at_stmt);
1598 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1599 gimple_set_modified (vec_stmt, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1605 && ((is_gimple_assign (vec_stmt)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1607 || (is_gimple_call (vec_stmt)
1608 && !(gimple_call_flags (vec_stmt)
1609 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1610 {
1611 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1612 gimple_set_vdef (vec_stmt, new_vdef);
1613 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1614 }
1615 }
1616 }
1617 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1618 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1619 }
1620
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1625
1626 static internal_fn
1627 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1628 tree vectype_out, tree vectype_in)
1629 {
1630 internal_fn ifn;
1631 if (internal_fn_p (cfn))
1632 ifn = as_internal_fn (cfn);
1633 else
1634 ifn = associated_internal_fn (fndecl);
1635 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1636 {
1637 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1638 if (info.vectorizable)
1639 {
1640 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1641 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1642 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1643 OPTIMIZE_FOR_SPEED))
1644 return ifn;
1645 }
1646 }
1647 return IFN_LAST;
1648 }
1649
1650
1651 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1652 gimple_stmt_iterator *);
1653
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1658
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1666
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1669 types. */
1670
1671 static void
1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1673 vec_load_store_type vls_type,
1674 int group_size,
1675 vect_memory_access_type
1676 memory_access_type,
1677 unsigned int ncopies,
1678 gather_scatter_info *gs_info,
1679 tree scalar_mask)
1680 {
1681 /* Invariant loads need no special support. */
1682 if (memory_access_type == VMAT_INVARIANT)
1683 return;
1684
1685 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1686 machine_mode vecmode = TYPE_MODE (vectype);
1687 bool is_load = (vls_type == VLS_LOAD);
1688 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1689 {
1690 if (is_load
1691 ? !vect_load_lanes_supported (vectype, group_size, true)
1692 : !vect_store_lanes_supported (vectype, group_size, true))
1693 {
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1696 "can't operate on partial vectors because"
1697 " the target doesn't have an appropriate"
1698 " load/store-lanes instruction.\n");
1699 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1700 return;
1701 }
1702 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1703 return;
1704 }
1705
1706 if (memory_access_type == VMAT_GATHER_SCATTER)
1707 {
1708 internal_fn ifn = (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE);
1711 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1712 gs_info->memory_type,
1713 gs_info->offset_vectype,
1714 gs_info->scale))
1715 {
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1722 return;
1723 }
1724 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1725 return;
1726 }
1727
1728 if (memory_access_type != VMAT_CONTIGUOUS
1729 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1730 {
1731 /* Element X of the data must come from iteration i * VF + X of the
1732 scalar loop. We need more work to support other mappings. */
1733 if (dump_enabled_p ())
1734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1735 "can't operate on partial vectors because an"
1736 " access isn't contiguous.\n");
1737 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1738 return;
1739 }
1740
1741 if (!VECTOR_MODE_P (vecmode))
1742 {
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1745 "can't operate on partial vectors when emulating"
1746 " vector operations.\n");
1747 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1748 return;
1749 }
1750
1751 /* We might load more scalars than we need for permuting SLP loads.
1752 We checked in get_group_load_store_type that the extra elements
1753 don't leak into a new vector. */
1754 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1755 {
1756 unsigned int nvectors;
1757 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1758 return nvectors;
1759 gcc_unreachable ();
1760 };
1761
1762 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1763 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1764 machine_mode mask_mode;
1765 bool using_partial_vectors_p = false;
1766 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1767 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1768 {
1769 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1770 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1771 using_partial_vectors_p = true;
1772 }
1773
1774 machine_mode vmode;
1775 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1776 {
1777 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1778 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1779 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1780 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1781 using_partial_vectors_p = true;
1782 }
1783
1784 if (!using_partial_vectors_p)
1785 {
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1788 "can't operate on partial vectors because the"
1789 " target doesn't have the appropriate partial"
1790 " vectorization load or store.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1792 }
1793 }
1794
1795 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1796 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1797 that needs to be applied to all loads and stores in a vectorized loop.
1798 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1799 otherwise return VEC_MASK & LOOP_MASK.
1800
1801 MASK_TYPE is the type of both masks. If new statements are needed,
1802 insert them before GSI. */
1803
1804 static tree
1805 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1806 tree vec_mask, gimple_stmt_iterator *gsi)
1807 {
1808 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1809 if (!loop_mask)
1810 return vec_mask;
1811
1812 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1813
1814 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1815 return vec_mask;
1816
1817 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1818 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1819 vec_mask, loop_mask);
1820
1821 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1822 return and_res;
1823 }
1824
1825 /* Determine whether we can use a gather load or scatter store to vectorize
1826 strided load or store STMT_INFO by truncating the current offset to a
1827 smaller width. We need to be able to construct an offset vector:
1828
1829 { 0, X, X*2, X*3, ... }
1830
1831 without loss of precision, where X is STMT_INFO's DR_STEP.
1832
1833 Return true if this is possible, describing the gather load or scatter
1834 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1835
1836 static bool
1837 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1838 loop_vec_info loop_vinfo, bool masked_p,
1839 gather_scatter_info *gs_info)
1840 {
1841 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1842 data_reference *dr = dr_info->dr;
1843 tree step = DR_STEP (dr);
1844 if (TREE_CODE (step) != INTEGER_CST)
1845 {
1846 /* ??? Perhaps we could use range information here? */
1847 if (dump_enabled_p ())
1848 dump_printf_loc (MSG_NOTE, vect_location,
1849 "cannot truncate variable step.\n");
1850 return false;
1851 }
1852
1853 /* Get the number of bits in an element. */
1854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1855 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1856 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1857
1858 /* Set COUNT to the upper limit on the number of elements - 1.
1859 Start with the maximum vectorization factor. */
1860 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1861
1862 /* Try lowering COUNT to the number of scalar latch iterations. */
1863 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1864 widest_int max_iters;
1865 if (max_loop_iterations (loop, &max_iters)
1866 && max_iters < count)
1867 count = max_iters.to_shwi ();
1868
1869 /* Try scales of 1 and the element size. */
1870 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1871 wi::overflow_type overflow = wi::OVF_NONE;
1872 for (int i = 0; i < 2; ++i)
1873 {
1874 int scale = scales[i];
1875 widest_int factor;
1876 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1877 continue;
1878
1879 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1880 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1881 if (overflow)
1882 continue;
1883 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1884 unsigned int min_offset_bits = wi::min_precision (range, sign);
1885
1886 /* Find the narrowest viable offset type. */
1887 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1888 tree offset_type = build_nonstandard_integer_type (offset_bits,
1889 sign == UNSIGNED);
1890
1891 /* See whether the target supports the operation with an offset
1892 no narrower than OFFSET_TYPE. */
1893 tree memory_type = TREE_TYPE (DR_REF (dr));
1894 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1895 vectype, memory_type, offset_type, scale,
1896 &gs_info->ifn, &gs_info->offset_vectype)
1897 || gs_info->ifn == IFN_LAST)
1898 continue;
1899
1900 gs_info->decl = NULL_TREE;
1901 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1902 but we don't need to store that here. */
1903 gs_info->base = NULL_TREE;
1904 gs_info->element_type = TREE_TYPE (vectype);
1905 gs_info->offset = fold_convert (offset_type, step);
1906 gs_info->offset_dt = vect_constant_def;
1907 gs_info->scale = scale;
1908 gs_info->memory_type = memory_type;
1909 return true;
1910 }
1911
1912 if (overflow && dump_enabled_p ())
1913 dump_printf_loc (MSG_NOTE, vect_location,
1914 "truncating gather/scatter offset to %d bits"
1915 " might change its value.\n", element_bits);
1916
1917 return false;
1918 }
1919
1920 /* Return true if we can use gather/scatter internal functions to
1921 vectorize STMT_INFO, which is a grouped or strided load or store.
1922 MASKED_P is true if load or store is conditional. When returning
1923 true, fill in GS_INFO with the information required to perform the
1924 operation. */
1925
1926 static bool
1927 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1928 loop_vec_info loop_vinfo, bool masked_p,
1929 gather_scatter_info *gs_info)
1930 {
1931 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1932 || gs_info->ifn == IFN_LAST)
1933 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1934 masked_p, gs_info);
1935
1936 tree old_offset_type = TREE_TYPE (gs_info->offset);
1937 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1938
1939 gcc_assert (TYPE_PRECISION (new_offset_type)
1940 >= TYPE_PRECISION (old_offset_type));
1941 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1942
1943 if (dump_enabled_p ())
1944 dump_printf_loc (MSG_NOTE, vect_location,
1945 "using gather/scatter for strided/grouped access,"
1946 " scale = %d\n", gs_info->scale);
1947
1948 return true;
1949 }
1950
1951 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1952 elements with a known constant step. Return -1 if that step
1953 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1954
1955 static int
1956 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1957 {
1958 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1959 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1960 size_zero_node);
1961 }
1962
1963 /* If the target supports a permute mask that reverses the elements in
1964 a vector of type VECTYPE, return that mask, otherwise return null. */
1965
1966 static tree
1967 perm_mask_for_reverse (tree vectype)
1968 {
1969 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1970
1971 /* The encoding has a single stepped pattern. */
1972 vec_perm_builder sel (nunits, 1, 3);
1973 for (int i = 0; i < 3; ++i)
1974 sel.quick_push (nunits - 1 - i);
1975
1976 vec_perm_indices indices (sel, 1, nunits);
1977 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1978 return NULL_TREE;
1979 return vect_gen_perm_mask_checked (vectype, indices);
1980 }
1981
1982 /* A subroutine of get_load_store_type, with a subset of the same
1983 arguments. Handle the case where STMT_INFO is a load or store that
1984 accesses consecutive elements with a negative step. Sets *POFFSET
1985 to the offset to be applied to the DR for the first access. */
1986
1987 static vect_memory_access_type
1988 get_negative_load_store_type (vec_info *vinfo,
1989 stmt_vec_info stmt_info, tree vectype,
1990 vec_load_store_type vls_type,
1991 unsigned int ncopies, poly_int64 *poffset)
1992 {
1993 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1994 dr_alignment_support alignment_support_scheme;
1995
1996 if (ncopies > 1)
1997 {
1998 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2000 "multiple types with negative step.\n");
2001 return VMAT_ELEMENTWISE;
2002 }
2003
2004 /* For backward running DRs the first access in vectype actually is
2005 N-1 elements before the address of the DR. */
2006 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2007 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2008
2009 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2010 alignment_support_scheme
2011 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2012 if (alignment_support_scheme != dr_aligned
2013 && alignment_support_scheme != dr_unaligned_supported)
2014 {
2015 if (dump_enabled_p ())
2016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2017 "negative step but alignment required.\n");
2018 *poffset = 0;
2019 return VMAT_ELEMENTWISE;
2020 }
2021
2022 if (vls_type == VLS_STORE_INVARIANT)
2023 {
2024 if (dump_enabled_p ())
2025 dump_printf_loc (MSG_NOTE, vect_location,
2026 "negative step with invariant source;"
2027 " no permute needed.\n");
2028 return VMAT_CONTIGUOUS_DOWN;
2029 }
2030
2031 if (!perm_mask_for_reverse (vectype))
2032 {
2033 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2035 "negative step and reversing not supported.\n");
2036 *poffset = 0;
2037 return VMAT_ELEMENTWISE;
2038 }
2039
2040 return VMAT_CONTIGUOUS_REVERSE;
2041 }
2042
2043 /* STMT_INFO is either a masked or unconditional store. Return the value
2044 being stored. */
2045
2046 tree
2047 vect_get_store_rhs (stmt_vec_info stmt_info)
2048 {
2049 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2050 {
2051 gcc_assert (gimple_assign_single_p (assign));
2052 return gimple_assign_rhs1 (assign);
2053 }
2054 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2055 {
2056 internal_fn ifn = gimple_call_internal_fn (call);
2057 int index = internal_fn_stored_value_index (ifn);
2058 gcc_assert (index >= 0);
2059 return gimple_call_arg (call, index);
2060 }
2061 gcc_unreachable ();
2062 }
2063
2064 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2065
2066 This function returns a vector type which can be composed with NETLS pieces,
2067 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2068 same vector size as the return vector. It checks target whether supports
2069 pieces-size vector mode for construction firstly, if target fails to, check
2070 pieces-size scalar mode for construction further. It returns NULL_TREE if
2071 fails to find the available composition.
2072
2073 For example, for (vtype=V16QI, nelts=4), we can probably get:
2074 - V16QI with PTYPE V4QI.
2075 - V4SI with PTYPE SI.
2076 - NULL_TREE. */
2077
2078 static tree
2079 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2080 {
2081 gcc_assert (VECTOR_TYPE_P (vtype));
2082 gcc_assert (known_gt (nelts, 0U));
2083
2084 machine_mode vmode = TYPE_MODE (vtype);
2085 if (!VECTOR_MODE_P (vmode))
2086 return NULL_TREE;
2087
2088 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2089 unsigned int pbsize;
2090 if (constant_multiple_p (vbsize, nelts, &pbsize))
2091 {
2092 /* First check if vec_init optab supports construction from
2093 vector pieces directly. */
2094 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2095 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2096 machine_mode rmode;
2097 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2098 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2099 != CODE_FOR_nothing))
2100 {
2101 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2102 return vtype;
2103 }
2104
2105 /* Otherwise check if exists an integer type of the same piece size and
2106 if vec_init optab supports construction from it directly. */
2107 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2108 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2109 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2110 != CODE_FOR_nothing))
2111 {
2112 *ptype = build_nonstandard_integer_type (pbsize, 1);
2113 return build_vector_type (*ptype, nelts);
2114 }
2115 }
2116
2117 return NULL_TREE;
2118 }
2119
2120 /* A subroutine of get_load_store_type, with a subset of the same
2121 arguments. Handle the case where STMT_INFO is part of a grouped load
2122 or store.
2123
2124 For stores, the statements in the group are all consecutive
2125 and there is no gap at the end. For loads, the statements in the
2126 group might not be consecutive; there can be gaps between statements
2127 as well as at the end. */
2128
2129 static bool
2130 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2131 tree vectype, slp_tree slp_node,
2132 bool masked_p, vec_load_store_type vls_type,
2133 vect_memory_access_type *memory_access_type,
2134 poly_int64 *poffset,
2135 dr_alignment_support *alignment_support_scheme,
2136 int *misalignment,
2137 gather_scatter_info *gs_info)
2138 {
2139 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2140 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2141 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2142 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2143 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2144 bool single_element_p = (stmt_info == first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2146 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2148
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p = false;
2152
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p = (!masked_p
2156 && vls_type == VLS_LOAD
2157 && loop_vinfo
2158 && !loop->inner);
2159
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2163
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2166
2167 if (slp_node)
2168 {
2169 /* For SLP vectorization we directly vectorize a subchain
2170 without permutation. */
2171 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2172 first_dr_info
2173 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2174 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2175 {
2176 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2177 separated by the stride, until we have a complete vector.
2178 Fall back to scalar accesses if that isn't possible. */
2179 if (multiple_p (nunits, group_size))
2180 *memory_access_type = VMAT_STRIDED_SLP;
2181 else
2182 *memory_access_type = VMAT_ELEMENTWISE;
2183 }
2184 else
2185 {
2186 overrun_p = loop_vinfo && gap != 0;
2187 if (overrun_p && vls_type != VLS_LOAD)
2188 {
2189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2190 "Grouped store with gaps requires"
2191 " non-consecutive accesses\n");
2192 return false;
2193 }
2194 /* An overrun is fine if the trailing elements are smaller
2195 than the alignment boundary B. Every vector access will
2196 be a multiple of B and so we are guaranteed to access a
2197 non-gap element in the same B-sized block. */
2198 if (overrun_p
2199 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2200 vectype)
2201 / vect_get_scalar_dr_size (first_dr_info)))
2202 overrun_p = false;
2203
2204 /* If the gap splits the vector in half and the target
2205 can do half-vector operations avoid the epilogue peeling
2206 by simply loading half of the vector only. Usually
2207 the construction with an upper zero half will be elided. */
2208 dr_alignment_support alss;
2209 int misalign = dr_misalignment (first_dr_info, vectype);
2210 tree half_vtype;
2211 if (overrun_p
2212 && !masked_p
2213 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2214 vectype, misalign)))
2215 == dr_aligned
2216 || alss == dr_unaligned_supported)
2217 && known_eq (nunits, (group_size - gap) * 2)
2218 && known_eq (nunits, group_size)
2219 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2220 != NULL_TREE))
2221 overrun_p = false;
2222
2223 if (overrun_p && !can_overrun_p)
2224 {
2225 if (dump_enabled_p ())
2226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2227 "Peeling for outer loop is not supported\n");
2228 return false;
2229 }
2230 int cmp = compare_step_with_zero (vinfo, stmt_info);
2231 if (cmp < 0)
2232 {
2233 if (single_element_p)
2234 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2235 only correct for single element "interleaving" SLP. */
2236 *memory_access_type = get_negative_load_store_type
2237 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2238 else
2239 {
2240 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2241 separated by the stride, until we have a complete vector.
2242 Fall back to scalar accesses if that isn't possible. */
2243 if (multiple_p (nunits, group_size))
2244 *memory_access_type = VMAT_STRIDED_SLP;
2245 else
2246 *memory_access_type = VMAT_ELEMENTWISE;
2247 }
2248 }
2249 else
2250 {
2251 gcc_assert (!loop_vinfo || cmp > 0);
2252 *memory_access_type = VMAT_CONTIGUOUS;
2253 }
2254 }
2255 }
2256 else
2257 {
2258 /* We can always handle this case using elementwise accesses,
2259 but see if something more efficient is available. */
2260 *memory_access_type = VMAT_ELEMENTWISE;
2261
2262 /* If there is a gap at the end of the group then these optimizations
2263 would access excess elements in the last iteration. */
2264 bool would_overrun_p = (gap != 0);
2265 /* An overrun is fine if the trailing elements are smaller than the
2266 alignment boundary B. Every vector access will be a multiple of B
2267 and so we are guaranteed to access a non-gap element in the
2268 same B-sized block. */
2269 if (would_overrun_p
2270 && !masked_p
2271 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2272 / vect_get_scalar_dr_size (first_dr_info)))
2273 would_overrun_p = false;
2274
2275 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2276 && (can_overrun_p || !would_overrun_p)
2277 && compare_step_with_zero (vinfo, stmt_info) > 0)
2278 {
2279 /* First cope with the degenerate case of a single-element
2280 vector. */
2281 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2282 ;
2283
2284 /* Otherwise try using LOAD/STORE_LANES. */
2285 else if (vls_type == VLS_LOAD
2286 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2287 : vect_store_lanes_supported (vectype, group_size,
2288 masked_p))
2289 {
2290 *memory_access_type = VMAT_LOAD_STORE_LANES;
2291 overrun_p = would_overrun_p;
2292 }
2293
2294 /* If that fails, try using permuting loads. */
2295 else if (vls_type == VLS_LOAD
2296 ? vect_grouped_load_supported (vectype, single_element_p,
2297 group_size)
2298 : vect_grouped_store_supported (vectype, group_size))
2299 {
2300 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2301 overrun_p = would_overrun_p;
2302 }
2303 }
2304
2305 /* As a last resort, trying using a gather load or scatter store.
2306
2307 ??? Although the code can handle all group sizes correctly,
2308 it probably isn't a win to use separate strided accesses based
2309 on nearby locations. Or, even if it's a win over scalar code,
2310 it might not be a win over vectorizing at a lower VF, if that
2311 allows us to use contiguous accesses. */
2312 if (*memory_access_type == VMAT_ELEMENTWISE
2313 && single_element_p
2314 && loop_vinfo
2315 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2316 masked_p, gs_info))
2317 *memory_access_type = VMAT_GATHER_SCATTER;
2318 }
2319
2320 if (*memory_access_type == VMAT_GATHER_SCATTER
2321 || *memory_access_type == VMAT_ELEMENTWISE)
2322 {
2323 *alignment_support_scheme = dr_unaligned_supported;
2324 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2325 }
2326 else
2327 {
2328 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2329 *alignment_support_scheme
2330 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2331 *misalignment);
2332 }
2333
2334 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2335 {
2336 /* STMT is the leader of the group. Check the operands of all the
2337 stmts of the group. */
2338 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2339 while (next_stmt_info)
2340 {
2341 tree op = vect_get_store_rhs (next_stmt_info);
2342 enum vect_def_type dt;
2343 if (!vect_is_simple_use (op, vinfo, &dt))
2344 {
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2347 "use not simple.\n");
2348 return false;
2349 }
2350 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2351 }
2352 }
2353
2354 if (overrun_p)
2355 {
2356 gcc_assert (can_overrun_p);
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 "Data access with gaps requires scalar "
2360 "epilogue loop\n");
2361 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2362 }
2363
2364 return true;
2365 }
2366
2367 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2368 if there is a memory access type that the vectorized form can use,
2369 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2370 or scatters, fill in GS_INFO accordingly. In addition
2371 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2372 the target does not support the alignment scheme. *MISALIGNMENT
2373 is set according to the alignment of the access (including
2374 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2375
2376 SLP says whether we're performing SLP rather than loop vectorization.
2377 MASKED_P is true if the statement is conditional on a vectorized mask.
2378 VECTYPE is the vector type that the vectorized statements will use.
2379 NCOPIES is the number of vector statements that will be needed. */
2380
2381 static bool
2382 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2383 tree vectype, slp_tree slp_node,
2384 bool masked_p, vec_load_store_type vls_type,
2385 unsigned int ncopies,
2386 vect_memory_access_type *memory_access_type,
2387 poly_int64 *poffset,
2388 dr_alignment_support *alignment_support_scheme,
2389 int *misalignment,
2390 gather_scatter_info *gs_info)
2391 {
2392 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2393 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2394 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2395 *poffset = 0;
2396 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2397 {
2398 *memory_access_type = VMAT_GATHER_SCATTER;
2399 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2400 gcc_unreachable ();
2401 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2402 &gs_info->offset_dt,
2403 &gs_info->offset_vectype))
2404 {
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "%s index use not simple.\n",
2408 vls_type == VLS_LOAD ? "gather" : "scatter");
2409 return false;
2410 }
2411 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2412 {
2413 if (vls_type != VLS_LOAD)
2414 {
2415 if (dump_enabled_p ())
2416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2417 "unsupported emulated scatter.\n");
2418 return false;
2419 }
2420 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2421 || !TYPE_VECTOR_SUBPARTS
2422 (gs_info->offset_vectype).is_constant ()
2423 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2424 (gs_info->offset_vectype),
2425 TYPE_VECTOR_SUBPARTS (vectype)))
2426 {
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "unsupported vector types for emulated "
2430 "gather.\n");
2431 return false;
2432 }
2433 }
2434 /* Gather-scatter accesses perform only component accesses, alignment
2435 is irrelevant for them. */
2436 *alignment_support_scheme = dr_unaligned_supported;
2437 }
2438 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2439 {
2440 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2441 masked_p,
2442 vls_type, memory_access_type, poffset,
2443 alignment_support_scheme,
2444 misalignment, gs_info))
2445 return false;
2446 }
2447 else if (STMT_VINFO_STRIDED_P (stmt_info))
2448 {
2449 gcc_assert (!slp_node);
2450 if (loop_vinfo
2451 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2452 masked_p, gs_info))
2453 *memory_access_type = VMAT_GATHER_SCATTER;
2454 else
2455 *memory_access_type = VMAT_ELEMENTWISE;
2456 /* Alignment is irrelevant here. */
2457 *alignment_support_scheme = dr_unaligned_supported;
2458 }
2459 else
2460 {
2461 int cmp = compare_step_with_zero (vinfo, stmt_info);
2462 if (cmp == 0)
2463 {
2464 gcc_assert (vls_type == VLS_LOAD);
2465 *memory_access_type = VMAT_INVARIANT;
2466 /* Invariant accesses perform only component accesses, alignment
2467 is irrelevant for them. */
2468 *alignment_support_scheme = dr_unaligned_supported;
2469 }
2470 else
2471 {
2472 if (cmp < 0)
2473 *memory_access_type = get_negative_load_store_type
2474 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2475 else
2476 *memory_access_type = VMAT_CONTIGUOUS;
2477 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2478 vectype, *poffset);
2479 *alignment_support_scheme
2480 = vect_supportable_dr_alignment (vinfo,
2481 STMT_VINFO_DR_INFO (stmt_info),
2482 vectype, *misalignment);
2483 }
2484 }
2485
2486 if ((*memory_access_type == VMAT_ELEMENTWISE
2487 || *memory_access_type == VMAT_STRIDED_SLP)
2488 && !nunits.is_constant ())
2489 {
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2492 "Not using elementwise accesses due to variable "
2493 "vectorization factor.\n");
2494 return false;
2495 }
2496
2497 if (*alignment_support_scheme == dr_unaligned_unsupported)
2498 {
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 "unsupported unaligned access\n");
2502 return false;
2503 }
2504
2505 /* FIXME: At the moment the cost model seems to underestimate the
2506 cost of using elementwise accesses. This check preserves the
2507 traditional behavior until that can be fixed. */
2508 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2509 if (!first_stmt_info)
2510 first_stmt_info = stmt_info;
2511 if (*memory_access_type == VMAT_ELEMENTWISE
2512 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2513 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2514 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2515 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2516 {
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "not falling back to elementwise accesses\n");
2520 return false;
2521 }
2522 return true;
2523 }
2524
2525 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2526 conditional operation STMT_INFO. When returning true, store the mask
2527 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2528 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2529 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2530
2531 static bool
2532 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2533 slp_tree slp_node, unsigned mask_index,
2534 tree *mask, slp_tree *mask_node,
2535 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2536 {
2537 enum vect_def_type mask_dt;
2538 tree mask_vectype;
2539 slp_tree mask_node_1;
2540 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2541 mask, &mask_node_1, &mask_dt, &mask_vectype))
2542 {
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "mask use not simple.\n");
2546 return false;
2547 }
2548
2549 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2550 {
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2553 "mask argument is not a boolean.\n");
2554 return false;
2555 }
2556
2557 /* If the caller is not prepared for adjusting an external/constant
2558 SLP mask vector type fail. */
2559 if (slp_node
2560 && !mask_node
2561 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2562 {
2563 if (dump_enabled_p ())
2564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2565 "SLP mask argument is not vectorized.\n");
2566 return false;
2567 }
2568
2569 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2570 if (!mask_vectype)
2571 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2572
2573 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2574 {
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2577 "could not find an appropriate vector mask type.\n");
2578 return false;
2579 }
2580
2581 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2582 TYPE_VECTOR_SUBPARTS (vectype)))
2583 {
2584 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2586 "vector mask type %T"
2587 " does not match vector data type %T.\n",
2588 mask_vectype, vectype);
2589
2590 return false;
2591 }
2592
2593 *mask_dt_out = mask_dt;
2594 *mask_vectype_out = mask_vectype;
2595 if (mask_node)
2596 *mask_node = mask_node_1;
2597 return true;
2598 }
2599
2600 /* Return true if stored value RHS is suitable for vectorizing store
2601 statement STMT_INFO. When returning true, store the type of the
2602 definition in *RHS_DT_OUT, the type of the vectorized store value in
2603 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2604
2605 static bool
2606 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2607 slp_tree slp_node, tree rhs,
2608 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2609 vec_load_store_type *vls_type_out)
2610 {
2611 /* In the case this is a store from a constant make sure
2612 native_encode_expr can handle it. */
2613 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2614 {
2615 if (dump_enabled_p ())
2616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2617 "cannot encode constant as a byte sequence.\n");
2618 return false;
2619 }
2620
2621 unsigned op_no = 0;
2622 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2623 {
2624 if (gimple_call_internal_p (call)
2625 && internal_store_fn_p (gimple_call_internal_fn (call)))
2626 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2627 }
2628
2629 enum vect_def_type rhs_dt;
2630 tree rhs_vectype;
2631 slp_tree slp_op;
2632 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2633 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2634 {
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2637 "use not simple.\n");
2638 return false;
2639 }
2640
2641 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2642 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2643 {
2644 if (dump_enabled_p ())
2645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2646 "incompatible vector types.\n");
2647 return false;
2648 }
2649
2650 *rhs_dt_out = rhs_dt;
2651 *rhs_vectype_out = rhs_vectype;
2652 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2653 *vls_type_out = VLS_STORE_INVARIANT;
2654 else
2655 *vls_type_out = VLS_STORE;
2656 return true;
2657 }
2658
2659 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2660 Note that we support masks with floating-point type, in which case the
2661 floats are interpreted as a bitmask. */
2662
2663 static tree
2664 vect_build_all_ones_mask (vec_info *vinfo,
2665 stmt_vec_info stmt_info, tree masktype)
2666 {
2667 if (TREE_CODE (masktype) == INTEGER_TYPE)
2668 return build_int_cst (masktype, -1);
2669 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2670 {
2671 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2672 mask = build_vector_from_val (masktype, mask);
2673 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2674 }
2675 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2676 {
2677 REAL_VALUE_TYPE r;
2678 long tmp[6];
2679 for (int j = 0; j < 6; ++j)
2680 tmp[j] = -1;
2681 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2682 tree mask = build_real (TREE_TYPE (masktype), r);
2683 mask = build_vector_from_val (masktype, mask);
2684 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2685 }
2686 gcc_unreachable ();
2687 }
2688
2689 /* Build an all-zero merge value of type VECTYPE while vectorizing
2690 STMT_INFO as a gather load. */
2691
2692 static tree
2693 vect_build_zero_merge_argument (vec_info *vinfo,
2694 stmt_vec_info stmt_info, tree vectype)
2695 {
2696 tree merge;
2697 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2698 merge = build_int_cst (TREE_TYPE (vectype), 0);
2699 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2700 {
2701 REAL_VALUE_TYPE r;
2702 long tmp[6];
2703 for (int j = 0; j < 6; ++j)
2704 tmp[j] = 0;
2705 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2706 merge = build_real (TREE_TYPE (vectype), r);
2707 }
2708 else
2709 gcc_unreachable ();
2710 merge = build_vector_from_val (vectype, merge);
2711 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2712 }
2713
2714 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2715 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2716 the gather load operation. If the load is conditional, MASK is the
2717 unvectorized condition and MASK_DT is its definition type, otherwise
2718 MASK is null. */
2719
2720 static void
2721 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2722 gimple_stmt_iterator *gsi,
2723 gimple **vec_stmt,
2724 gather_scatter_info *gs_info,
2725 tree mask)
2726 {
2727 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2728 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2729 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2730 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2731 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2732 edge pe = loop_preheader_edge (loop);
2733 enum { NARROW, NONE, WIDEN } modifier;
2734 poly_uint64 gather_off_nunits
2735 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2736
2737 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2738 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2739 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2740 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2741 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2742 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2743 tree scaletype = TREE_VALUE (arglist);
2744 tree real_masktype = masktype;
2745 gcc_checking_assert (types_compatible_p (srctype, rettype)
2746 && (!mask
2747 || TREE_CODE (masktype) == INTEGER_TYPE
2748 || types_compatible_p (srctype, masktype)));
2749 if (mask)
2750 masktype = truth_type_for (srctype);
2751
2752 tree mask_halftype = masktype;
2753 tree perm_mask = NULL_TREE;
2754 tree mask_perm_mask = NULL_TREE;
2755 if (known_eq (nunits, gather_off_nunits))
2756 modifier = NONE;
2757 else if (known_eq (nunits * 2, gather_off_nunits))
2758 {
2759 modifier = WIDEN;
2760
2761 /* Currently widening gathers and scatters are only supported for
2762 fixed-length vectors. */
2763 int count = gather_off_nunits.to_constant ();
2764 vec_perm_builder sel (count, count, 1);
2765 for (int i = 0; i < count; ++i)
2766 sel.quick_push (i | (count / 2));
2767
2768 vec_perm_indices indices (sel, 1, count);
2769 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2770 indices);
2771 }
2772 else if (known_eq (nunits, gather_off_nunits * 2))
2773 {
2774 modifier = NARROW;
2775
2776 /* Currently narrowing gathers and scatters are only supported for
2777 fixed-length vectors. */
2778 int count = nunits.to_constant ();
2779 vec_perm_builder sel (count, count, 1);
2780 sel.quick_grow (count);
2781 for (int i = 0; i < count; ++i)
2782 sel[i] = i < count / 2 ? i : i + count / 2;
2783 vec_perm_indices indices (sel, 2, count);
2784 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2785
2786 ncopies *= 2;
2787
2788 if (mask && VECTOR_TYPE_P (real_masktype))
2789 {
2790 for (int i = 0; i < count; ++i)
2791 sel[i] = i | (count / 2);
2792 indices.new_vector (sel, 2, count);
2793 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2794 }
2795 else if (mask)
2796 mask_halftype = truth_type_for (gs_info->offset_vectype);
2797 }
2798 else
2799 gcc_unreachable ();
2800
2801 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2802 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2803
2804 tree ptr = fold_convert (ptrtype, gs_info->base);
2805 if (!is_gimple_min_invariant (ptr))
2806 {
2807 gimple_seq seq;
2808 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2809 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2810 gcc_assert (!new_bb);
2811 }
2812
2813 tree scale = build_int_cst (scaletype, gs_info->scale);
2814
2815 tree vec_oprnd0 = NULL_TREE;
2816 tree vec_mask = NULL_TREE;
2817 tree src_op = NULL_TREE;
2818 tree mask_op = NULL_TREE;
2819 tree prev_res = NULL_TREE;
2820
2821 if (!mask)
2822 {
2823 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2824 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2825 }
2826
2827 auto_vec<tree> vec_oprnds0;
2828 auto_vec<tree> vec_masks;
2829 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2830 modifier == WIDEN ? ncopies / 2 : ncopies,
2831 gs_info->offset, &vec_oprnds0);
2832 if (mask)
2833 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2834 modifier == NARROW ? ncopies / 2 : ncopies,
2835 mask, &vec_masks, masktype);
2836 for (int j = 0; j < ncopies; ++j)
2837 {
2838 tree op, var;
2839 if (modifier == WIDEN && (j & 1))
2840 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2841 perm_mask, stmt_info, gsi);
2842 else
2843 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2844
2845 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2846 {
2847 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2848 TYPE_VECTOR_SUBPARTS (idxtype)));
2849 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2850 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2851 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2852 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2853 op = var;
2854 }
2855
2856 if (mask)
2857 {
2858 if (mask_perm_mask && (j & 1))
2859 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2860 mask_perm_mask, stmt_info, gsi);
2861 else
2862 {
2863 if (modifier == NARROW)
2864 {
2865 if ((j & 1) == 0)
2866 vec_mask = vec_masks[j / 2];
2867 }
2868 else
2869 vec_mask = vec_masks[j];
2870
2871 mask_op = vec_mask;
2872 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2873 {
2874 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2875 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2876 gcc_assert (known_eq (sub1, sub2));
2877 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2878 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2879 gassign *new_stmt
2880 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2881 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2882 mask_op = var;
2883 }
2884 }
2885 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2886 {
2887 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2888 gassign *new_stmt
2889 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2890 : VEC_UNPACK_LO_EXPR,
2891 mask_op);
2892 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2893 mask_op = var;
2894 }
2895 src_op = mask_op;
2896 }
2897
2898 tree mask_arg = mask_op;
2899 if (masktype != real_masktype)
2900 {
2901 tree utype, optype = TREE_TYPE (mask_op);
2902 if (VECTOR_TYPE_P (real_masktype)
2903 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2904 utype = real_masktype;
2905 else
2906 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2907 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2908 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2909 gassign *new_stmt
2910 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2911 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2912 mask_arg = var;
2913 if (!useless_type_conversion_p (real_masktype, utype))
2914 {
2915 gcc_assert (TYPE_PRECISION (utype)
2916 <= TYPE_PRECISION (real_masktype));
2917 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2918 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2919 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2920 mask_arg = var;
2921 }
2922 src_op = build_zero_cst (srctype);
2923 }
2924 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2925 mask_arg, scale);
2926
2927 if (!useless_type_conversion_p (vectype, rettype))
2928 {
2929 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2930 TYPE_VECTOR_SUBPARTS (rettype)));
2931 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2932 gimple_call_set_lhs (new_stmt, op);
2933 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2934 var = make_ssa_name (vec_dest);
2935 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2936 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2937 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2938 }
2939 else
2940 {
2941 var = make_ssa_name (vec_dest, new_stmt);
2942 gimple_call_set_lhs (new_stmt, var);
2943 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2944 }
2945
2946 if (modifier == NARROW)
2947 {
2948 if ((j & 1) == 0)
2949 {
2950 prev_res = var;
2951 continue;
2952 }
2953 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2954 stmt_info, gsi);
2955 new_stmt = SSA_NAME_DEF_STMT (var);
2956 }
2957
2958 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2959 }
2960 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2961 }
2962
2963 /* Prepare the base and offset in GS_INFO for vectorization.
2964 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2965 to the vectorized offset argument for the first copy of STMT_INFO.
2966 STMT_INFO is the statement described by GS_INFO and LOOP is the
2967 containing loop. */
2968
2969 static void
2970 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2971 class loop *loop, stmt_vec_info stmt_info,
2972 slp_tree slp_node, gather_scatter_info *gs_info,
2973 tree *dataref_ptr, vec<tree> *vec_offset)
2974 {
2975 gimple_seq stmts = NULL;
2976 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2977 if (stmts != NULL)
2978 {
2979 basic_block new_bb;
2980 edge pe = loop_preheader_edge (loop);
2981 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2982 gcc_assert (!new_bb);
2983 }
2984 if (slp_node)
2985 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2986 else
2987 {
2988 unsigned ncopies
2989 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2990 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2991 gs_info->offset, vec_offset,
2992 gs_info->offset_vectype);
2993 }
2994 }
2995
2996 /* Prepare to implement a grouped or strided load or store using
2997 the gather load or scatter store operation described by GS_INFO.
2998 STMT_INFO is the load or store statement.
2999
3000 Set *DATAREF_BUMP to the amount that should be added to the base
3001 address after each copy of the vectorized statement. Set *VEC_OFFSET
3002 to an invariant offset vector in which element I has the value
3003 I * DR_STEP / SCALE. */
3004
3005 static void
3006 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3007 loop_vec_info loop_vinfo,
3008 gather_scatter_info *gs_info,
3009 tree *dataref_bump, tree *vec_offset)
3010 {
3011 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3012 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3013
3014 tree bump = size_binop (MULT_EXPR,
3015 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3016 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3017 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3018
3019 /* The offset given in GS_INFO can have pointer type, so use the element
3020 type of the vector instead. */
3021 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3022
3023 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3024 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3025 ssize_int (gs_info->scale));
3026 step = fold_convert (offset_type, step);
3027
3028 /* Create {0, X, X*2, X*3, ...}. */
3029 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3030 build_zero_cst (offset_type), step);
3031 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3032 }
3033
3034 /* Return the amount that should be added to a vector pointer to move
3035 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3036 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3037 vectorization. */
3038
3039 static tree
3040 vect_get_data_ptr_increment (vec_info *vinfo,
3041 dr_vec_info *dr_info, tree aggr_type,
3042 vect_memory_access_type memory_access_type)
3043 {
3044 if (memory_access_type == VMAT_INVARIANT)
3045 return size_zero_node;
3046
3047 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3048 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3049 if (tree_int_cst_sgn (step) == -1)
3050 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3051 return iv_step;
3052 }
3053
3054 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3055
3056 static bool
3057 vectorizable_bswap (vec_info *vinfo,
3058 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3059 gimple **vec_stmt, slp_tree slp_node,
3060 slp_tree *slp_op,
3061 tree vectype_in, stmt_vector_for_cost *cost_vec)
3062 {
3063 tree op, vectype;
3064 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3065 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3066 unsigned ncopies;
3067
3068 op = gimple_call_arg (stmt, 0);
3069 vectype = STMT_VINFO_VECTYPE (stmt_info);
3070 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3071
3072 /* Multiple types in SLP are handled by creating the appropriate number of
3073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3074 case of SLP. */
3075 if (slp_node)
3076 ncopies = 1;
3077 else
3078 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3079
3080 gcc_assert (ncopies >= 1);
3081
3082 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3083 if (! char_vectype)
3084 return false;
3085
3086 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3087 unsigned word_bytes;
3088 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3089 return false;
3090
3091 /* The encoding uses one stepped pattern for each byte in the word. */
3092 vec_perm_builder elts (num_bytes, word_bytes, 3);
3093 for (unsigned i = 0; i < 3; ++i)
3094 for (unsigned j = 0; j < word_bytes; ++j)
3095 elts.quick_push ((i + 1) * word_bytes - j - 1);
3096
3097 vec_perm_indices indices (elts, 1, num_bytes);
3098 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3099 return false;
3100
3101 if (! vec_stmt)
3102 {
3103 if (slp_node
3104 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3105 {
3106 if (dump_enabled_p ())
3107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3108 "incompatible vector types for invariants\n");
3109 return false;
3110 }
3111
3112 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3113 DUMP_VECT_SCOPE ("vectorizable_bswap");
3114 record_stmt_cost (cost_vec,
3115 1, vector_stmt, stmt_info, 0, vect_prologue);
3116 record_stmt_cost (cost_vec,
3117 slp_node
3118 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3119 vec_perm, stmt_info, 0, vect_body);
3120 return true;
3121 }
3122
3123 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3124
3125 /* Transform. */
3126 vec<tree> vec_oprnds = vNULL;
3127 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3128 op, &vec_oprnds);
3129 /* Arguments are ready. create the new vector stmt. */
3130 unsigned i;
3131 tree vop;
3132 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3133 {
3134 gimple *new_stmt;
3135 tree tem = make_ssa_name (char_vectype);
3136 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3137 char_vectype, vop));
3138 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3139 tree tem2 = make_ssa_name (char_vectype);
3140 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3141 tem, tem, bswap_vconst);
3142 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3143 tem = make_ssa_name (vectype);
3144 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3145 vectype, tem2));
3146 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3147 if (slp_node)
3148 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3149 else
3150 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3151 }
3152
3153 if (!slp_node)
3154 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3155
3156 vec_oprnds.release ();
3157 return true;
3158 }
3159
3160 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3161 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3162 in a single step. On success, store the binary pack code in
3163 *CONVERT_CODE. */
3164
3165 static bool
3166 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3167 tree_code *convert_code)
3168 {
3169 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3170 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3171 return false;
3172
3173 tree_code code;
3174 int multi_step_cvt = 0;
3175 auto_vec <tree, 8> interm_types;
3176 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3177 &code, &multi_step_cvt, &interm_types)
3178 || multi_step_cvt)
3179 return false;
3180
3181 *convert_code = code;
3182 return true;
3183 }
3184
3185 /* Function vectorizable_call.
3186
3187 Check if STMT_INFO performs a function call that can be vectorized.
3188 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3189 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3190 Return true if STMT_INFO is vectorizable in this way. */
3191
3192 static bool
3193 vectorizable_call (vec_info *vinfo,
3194 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3195 gimple **vec_stmt, slp_tree slp_node,
3196 stmt_vector_for_cost *cost_vec)
3197 {
3198 gcall *stmt;
3199 tree vec_dest;
3200 tree scalar_dest;
3201 tree op;
3202 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3203 tree vectype_out, vectype_in;
3204 poly_uint64 nunits_in;
3205 poly_uint64 nunits_out;
3206 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3207 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3208 tree fndecl, new_temp, rhs_type;
3209 enum vect_def_type dt[4]
3210 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3211 vect_unknown_def_type };
3212 tree vectypes[ARRAY_SIZE (dt)] = {};
3213 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3214 int ndts = ARRAY_SIZE (dt);
3215 int ncopies, j;
3216 auto_vec<tree, 8> vargs;
3217 enum { NARROW, NONE, WIDEN } modifier;
3218 size_t i, nargs;
3219 tree lhs;
3220
3221 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3222 return false;
3223
3224 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3225 && ! vec_stmt)
3226 return false;
3227
3228 /* Is STMT_INFO a vectorizable call? */
3229 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3230 if (!stmt)
3231 return false;
3232
3233 if (gimple_call_internal_p (stmt)
3234 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3235 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3236 /* Handled by vectorizable_load and vectorizable_store. */
3237 return false;
3238
3239 if (gimple_call_lhs (stmt) == NULL_TREE
3240 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3241 return false;
3242
3243 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3244
3245 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3246
3247 /* Process function arguments. */
3248 rhs_type = NULL_TREE;
3249 vectype_in = NULL_TREE;
3250 nargs = gimple_call_num_args (stmt);
3251
3252 /* Bail out if the function has more than four arguments, we do not have
3253 interesting builtin functions to vectorize with more than two arguments
3254 except for fma. No arguments is also not good. */
3255 if (nargs == 0 || nargs > 4)
3256 return false;
3257
3258 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3259 combined_fn cfn = gimple_call_combined_fn (stmt);
3260 if (cfn == CFN_GOMP_SIMD_LANE)
3261 {
3262 nargs = 0;
3263 rhs_type = unsigned_type_node;
3264 }
3265
3266 int mask_opno = -1;
3267 if (internal_fn_p (cfn))
3268 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3269
3270 for (i = 0; i < nargs; i++)
3271 {
3272 if ((int) i == mask_opno)
3273 {
3274 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3275 &op, &slp_op[i], &dt[i], &vectypes[i]))
3276 return false;
3277 continue;
3278 }
3279
3280 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3281 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3282 {
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3285 "use not simple.\n");
3286 return false;
3287 }
3288
3289 /* We can only handle calls with arguments of the same type. */
3290 if (rhs_type
3291 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3292 {
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3295 "argument types differ.\n");
3296 return false;
3297 }
3298 if (!rhs_type)
3299 rhs_type = TREE_TYPE (op);
3300
3301 if (!vectype_in)
3302 vectype_in = vectypes[i];
3303 else if (vectypes[i]
3304 && !types_compatible_p (vectypes[i], vectype_in))
3305 {
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308 "argument vector types differ.\n");
3309 return false;
3310 }
3311 }
3312 /* If all arguments are external or constant defs, infer the vector type
3313 from the scalar type. */
3314 if (!vectype_in)
3315 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3316 if (vec_stmt)
3317 gcc_assert (vectype_in);
3318 if (!vectype_in)
3319 {
3320 if (dump_enabled_p ())
3321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3322 "no vectype for scalar type %T\n", rhs_type);
3323
3324 return false;
3325 }
3326 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3327 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3328 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3329 by a pack of the two vectors into an SI vector. We would need
3330 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3331 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3332 {
3333 if (dump_enabled_p ())
3334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3335 "mismatched vector sizes %T and %T\n",
3336 vectype_in, vectype_out);
3337 return false;
3338 }
3339
3340 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3341 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3342 {
3343 if (dump_enabled_p ())
3344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3345 "mixed mask and nonmask vector types\n");
3346 return false;
3347 }
3348
3349 /* FORNOW */
3350 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3351 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3352 if (known_eq (nunits_in * 2, nunits_out))
3353 modifier = NARROW;
3354 else if (known_eq (nunits_out, nunits_in))
3355 modifier = NONE;
3356 else if (known_eq (nunits_out * 2, nunits_in))
3357 modifier = WIDEN;
3358 else
3359 return false;
3360
3361 /* We only handle functions that do not read or clobber memory. */
3362 if (gimple_vuse (stmt))
3363 {
3364 if (dump_enabled_p ())
3365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3366 "function reads from or writes to memory.\n");
3367 return false;
3368 }
3369
3370 /* For now, we only vectorize functions if a target specific builtin
3371 is available. TODO -- in some cases, it might be profitable to
3372 insert the calls for pieces of the vector, in order to be able
3373 to vectorize other operations in the loop. */
3374 fndecl = NULL_TREE;
3375 internal_fn ifn = IFN_LAST;
3376 tree callee = gimple_call_fndecl (stmt);
3377
3378 /* First try using an internal function. */
3379 tree_code convert_code = ERROR_MARK;
3380 if (cfn != CFN_LAST
3381 && (modifier == NONE
3382 || (modifier == NARROW
3383 && simple_integer_narrowing (vectype_out, vectype_in,
3384 &convert_code))))
3385 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3386 vectype_in);
3387
3388 /* If that fails, try asking for a target-specific built-in function. */
3389 if (ifn == IFN_LAST)
3390 {
3391 if (cfn != CFN_LAST)
3392 fndecl = targetm.vectorize.builtin_vectorized_function
3393 (cfn, vectype_out, vectype_in);
3394 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3395 fndecl = targetm.vectorize.builtin_md_vectorized_function
3396 (callee, vectype_out, vectype_in);
3397 }
3398
3399 if (ifn == IFN_LAST && !fndecl)
3400 {
3401 if (cfn == CFN_GOMP_SIMD_LANE
3402 && !slp_node
3403 && loop_vinfo
3404 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3405 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3406 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3407 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3408 {
3409 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3410 { 0, 1, 2, ... vf - 1 } vector. */
3411 gcc_assert (nargs == 0);
3412 }
3413 else if (modifier == NONE
3414 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3415 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3416 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3417 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3418 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3419 slp_op, vectype_in, cost_vec);
3420 else
3421 {
3422 if (dump_enabled_p ())
3423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3424 "function is not vectorizable.\n");
3425 return false;
3426 }
3427 }
3428
3429 if (slp_node)
3430 ncopies = 1;
3431 else if (modifier == NARROW && ifn == IFN_LAST)
3432 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3433 else
3434 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3435
3436 /* Sanity check: make sure that at least one copy of the vectorized stmt
3437 needs to be generated. */
3438 gcc_assert (ncopies >= 1);
3439
3440 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3441 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3442 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3443 if (!vec_stmt) /* transformation not required. */
3444 {
3445 if (slp_node)
3446 for (i = 0; i < nargs; ++i)
3447 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3448 {
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3451 "incompatible vector types for invariants\n");
3452 return false;
3453 }
3454 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3455 DUMP_VECT_SCOPE ("vectorizable_call");
3456 vect_model_simple_cost (vinfo, stmt_info,
3457 ncopies, dt, ndts, slp_node, cost_vec);
3458 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3459 record_stmt_cost (cost_vec, ncopies / 2,
3460 vec_promote_demote, stmt_info, 0, vect_body);
3461
3462 if (loop_vinfo
3463 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3464 && (reduc_idx >= 0 || mask_opno >= 0))
3465 {
3466 if (reduc_idx >= 0
3467 && (cond_fn == IFN_LAST
3468 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3469 OPTIMIZE_FOR_SPEED)))
3470 {
3471 if (dump_enabled_p ())
3472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3473 "can't use a fully-masked loop because no"
3474 " conditional operation is available.\n");
3475 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3476 }
3477 else
3478 {
3479 unsigned int nvectors
3480 = (slp_node
3481 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3482 : ncopies);
3483 tree scalar_mask = NULL_TREE;
3484 if (mask_opno >= 0)
3485 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3486 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3487 vectype_out, scalar_mask);
3488 }
3489 }
3490 return true;
3491 }
3492
3493 /* Transform. */
3494
3495 if (dump_enabled_p ())
3496 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3497
3498 /* Handle def. */
3499 scalar_dest = gimple_call_lhs (stmt);
3500 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3501
3502 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3503 unsigned int vect_nargs = nargs;
3504 if (masked_loop_p && reduc_idx >= 0)
3505 {
3506 ifn = cond_fn;
3507 vect_nargs += 2;
3508 }
3509
3510 if (modifier == NONE || ifn != IFN_LAST)
3511 {
3512 tree prev_res = NULL_TREE;
3513 vargs.safe_grow (vect_nargs, true);
3514 auto_vec<vec<tree> > vec_defs (nargs);
3515 for (j = 0; j < ncopies; ++j)
3516 {
3517 /* Build argument list for the vectorized call. */
3518 if (slp_node)
3519 {
3520 vec<tree> vec_oprnds0;
3521
3522 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3523 vec_oprnds0 = vec_defs[0];
3524
3525 /* Arguments are ready. Create the new vector stmt. */
3526 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3527 {
3528 int varg = 0;
3529 if (masked_loop_p && reduc_idx >= 0)
3530 {
3531 unsigned int vec_num = vec_oprnds0.length ();
3532 /* Always true for SLP. */
3533 gcc_assert (ncopies == 1);
3534 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3535 vectype_out, i);
3536 }
3537 size_t k;
3538 for (k = 0; k < nargs; k++)
3539 {
3540 vec<tree> vec_oprndsk = vec_defs[k];
3541 vargs[varg++] = vec_oprndsk[i];
3542 }
3543 if (masked_loop_p && reduc_idx >= 0)
3544 vargs[varg++] = vargs[reduc_idx + 1];
3545 gimple *new_stmt;
3546 if (modifier == NARROW)
3547 {
3548 /* We don't define any narrowing conditional functions
3549 at present. */
3550 gcc_assert (mask_opno < 0);
3551 tree half_res = make_ssa_name (vectype_in);
3552 gcall *call
3553 = gimple_build_call_internal_vec (ifn, vargs);
3554 gimple_call_set_lhs (call, half_res);
3555 gimple_call_set_nothrow (call, true);
3556 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3557 if ((i & 1) == 0)
3558 {
3559 prev_res = half_res;
3560 continue;
3561 }
3562 new_temp = make_ssa_name (vec_dest);
3563 new_stmt = gimple_build_assign (new_temp, convert_code,
3564 prev_res, half_res);
3565 vect_finish_stmt_generation (vinfo, stmt_info,
3566 new_stmt, gsi);
3567 }
3568 else
3569 {
3570 if (mask_opno >= 0 && masked_loop_p)
3571 {
3572 unsigned int vec_num = vec_oprnds0.length ();
3573 /* Always true for SLP. */
3574 gcc_assert (ncopies == 1);
3575 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3576 vectype_out, i);
3577 vargs[mask_opno] = prepare_vec_mask
3578 (loop_vinfo, TREE_TYPE (mask), mask,
3579 vargs[mask_opno], gsi);
3580 }
3581
3582 gcall *call;
3583 if (ifn != IFN_LAST)
3584 call = gimple_build_call_internal_vec (ifn, vargs);
3585 else
3586 call = gimple_build_call_vec (fndecl, vargs);
3587 new_temp = make_ssa_name (vec_dest, call);
3588 gimple_call_set_lhs (call, new_temp);
3589 gimple_call_set_nothrow (call, true);
3590 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3591 new_stmt = call;
3592 }
3593 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3594 }
3595 continue;
3596 }
3597
3598 int varg = 0;
3599 if (masked_loop_p && reduc_idx >= 0)
3600 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3601 vectype_out, j);
3602 for (i = 0; i < nargs; i++)
3603 {
3604 op = gimple_call_arg (stmt, i);
3605 if (j == 0)
3606 {
3607 vec_defs.quick_push (vNULL);
3608 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3609 op, &vec_defs[i],
3610 vectypes[i]);
3611 }
3612 vargs[varg++] = vec_defs[i][j];
3613 }
3614 if (masked_loop_p && reduc_idx >= 0)
3615 vargs[varg++] = vargs[reduc_idx + 1];
3616
3617 if (mask_opno >= 0 && masked_loop_p)
3618 {
3619 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3620 vectype_out, j);
3621 vargs[mask_opno]
3622 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3623 vargs[mask_opno], gsi);
3624 }
3625
3626 gimple *new_stmt;
3627 if (cfn == CFN_GOMP_SIMD_LANE)
3628 {
3629 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3630 tree new_var
3631 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3632 gimple *init_stmt = gimple_build_assign (new_var, cst);
3633 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3634 new_temp = make_ssa_name (vec_dest);
3635 new_stmt = gimple_build_assign (new_temp, new_var);
3636 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3637 }
3638 else if (modifier == NARROW)
3639 {
3640 /* We don't define any narrowing conditional functions at
3641 present. */
3642 gcc_assert (mask_opno < 0);
3643 tree half_res = make_ssa_name (vectype_in);
3644 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3645 gimple_call_set_lhs (call, half_res);
3646 gimple_call_set_nothrow (call, true);
3647 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3648 if ((j & 1) == 0)
3649 {
3650 prev_res = half_res;
3651 continue;
3652 }
3653 new_temp = make_ssa_name (vec_dest);
3654 new_stmt = gimple_build_assign (new_temp, convert_code,
3655 prev_res, half_res);
3656 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3657 }
3658 else
3659 {
3660 gcall *call;
3661 if (ifn != IFN_LAST)
3662 call = gimple_build_call_internal_vec (ifn, vargs);
3663 else
3664 call = gimple_build_call_vec (fndecl, vargs);
3665 new_temp = make_ssa_name (vec_dest, call);
3666 gimple_call_set_lhs (call, new_temp);
3667 gimple_call_set_nothrow (call, true);
3668 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3669 new_stmt = call;
3670 }
3671
3672 if (j == (modifier == NARROW ? 1 : 0))
3673 *vec_stmt = new_stmt;
3674 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3675 }
3676 for (i = 0; i < nargs; i++)
3677 {
3678 vec<tree> vec_oprndsi = vec_defs[i];
3679 vec_oprndsi.release ();
3680 }
3681 }
3682 else if (modifier == NARROW)
3683 {
3684 auto_vec<vec<tree> > vec_defs (nargs);
3685 /* We don't define any narrowing conditional functions at present. */
3686 gcc_assert (mask_opno < 0);
3687 for (j = 0; j < ncopies; ++j)
3688 {
3689 /* Build argument list for the vectorized call. */
3690 if (j == 0)
3691 vargs.create (nargs * 2);
3692 else
3693 vargs.truncate (0);
3694
3695 if (slp_node)
3696 {
3697 vec<tree> vec_oprnds0;
3698
3699 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3700 vec_oprnds0 = vec_defs[0];
3701
3702 /* Arguments are ready. Create the new vector stmt. */
3703 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3704 {
3705 size_t k;
3706 vargs.truncate (0);
3707 for (k = 0; k < nargs; k++)
3708 {
3709 vec<tree> vec_oprndsk = vec_defs[k];
3710 vargs.quick_push (vec_oprndsk[i]);
3711 vargs.quick_push (vec_oprndsk[i + 1]);
3712 }
3713 gcall *call;
3714 if (ifn != IFN_LAST)
3715 call = gimple_build_call_internal_vec (ifn, vargs);
3716 else
3717 call = gimple_build_call_vec (fndecl, vargs);
3718 new_temp = make_ssa_name (vec_dest, call);
3719 gimple_call_set_lhs (call, new_temp);
3720 gimple_call_set_nothrow (call, true);
3721 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3722 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3723 }
3724 continue;
3725 }
3726
3727 for (i = 0; i < nargs; i++)
3728 {
3729 op = gimple_call_arg (stmt, i);
3730 if (j == 0)
3731 {
3732 vec_defs.quick_push (vNULL);
3733 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3734 op, &vec_defs[i], vectypes[i]);
3735 }
3736 vec_oprnd0 = vec_defs[i][2*j];
3737 vec_oprnd1 = vec_defs[i][2*j+1];
3738
3739 vargs.quick_push (vec_oprnd0);
3740 vargs.quick_push (vec_oprnd1);
3741 }
3742
3743 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3744 new_temp = make_ssa_name (vec_dest, new_stmt);
3745 gimple_call_set_lhs (new_stmt, new_temp);
3746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3747
3748 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3749 }
3750
3751 if (!slp_node)
3752 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3753
3754 for (i = 0; i < nargs; i++)
3755 {
3756 vec<tree> vec_oprndsi = vec_defs[i];
3757 vec_oprndsi.release ();
3758 }
3759 }
3760 else
3761 /* No current target implements this case. */
3762 return false;
3763
3764 vargs.release ();
3765
3766 /* The call in STMT might prevent it from being removed in dce.
3767 We however cannot remove it here, due to the way the ssa name
3768 it defines is mapped to the new definition. So just replace
3769 rhs of the statement with something harmless. */
3770
3771 if (slp_node)
3772 return true;
3773
3774 stmt_info = vect_orig_stmt (stmt_info);
3775 lhs = gimple_get_lhs (stmt_info->stmt);
3776
3777 gassign *new_stmt
3778 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3779 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3780
3781 return true;
3782 }
3783
3784
3785 struct simd_call_arg_info
3786 {
3787 tree vectype;
3788 tree op;
3789 HOST_WIDE_INT linear_step;
3790 enum vect_def_type dt;
3791 unsigned int align;
3792 bool simd_lane_linear;
3793 };
3794
3795 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3796 is linear within simd lane (but not within whole loop), note it in
3797 *ARGINFO. */
3798
3799 static void
3800 vect_simd_lane_linear (tree op, class loop *loop,
3801 struct simd_call_arg_info *arginfo)
3802 {
3803 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3804
3805 if (!is_gimple_assign (def_stmt)
3806 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3807 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3808 return;
3809
3810 tree base = gimple_assign_rhs1 (def_stmt);
3811 HOST_WIDE_INT linear_step = 0;
3812 tree v = gimple_assign_rhs2 (def_stmt);
3813 while (TREE_CODE (v) == SSA_NAME)
3814 {
3815 tree t;
3816 def_stmt = SSA_NAME_DEF_STMT (v);
3817 if (is_gimple_assign (def_stmt))
3818 switch (gimple_assign_rhs_code (def_stmt))
3819 {
3820 case PLUS_EXPR:
3821 t = gimple_assign_rhs2 (def_stmt);
3822 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3823 return;
3824 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3825 v = gimple_assign_rhs1 (def_stmt);
3826 continue;
3827 case MULT_EXPR:
3828 t = gimple_assign_rhs2 (def_stmt);
3829 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3830 return;
3831 linear_step = tree_to_shwi (t);
3832 v = gimple_assign_rhs1 (def_stmt);
3833 continue;
3834 CASE_CONVERT:
3835 t = gimple_assign_rhs1 (def_stmt);
3836 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3837 || (TYPE_PRECISION (TREE_TYPE (v))
3838 < TYPE_PRECISION (TREE_TYPE (t))))
3839 return;
3840 if (!linear_step)
3841 linear_step = 1;
3842 v = t;
3843 continue;
3844 default:
3845 return;
3846 }
3847 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3848 && loop->simduid
3849 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3850 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3851 == loop->simduid))
3852 {
3853 if (!linear_step)
3854 linear_step = 1;
3855 arginfo->linear_step = linear_step;
3856 arginfo->op = base;
3857 arginfo->simd_lane_linear = true;
3858 return;
3859 }
3860 }
3861 }
3862
3863 /* Return the number of elements in vector type VECTYPE, which is associated
3864 with a SIMD clone. At present these vectors always have a constant
3865 length. */
3866
3867 static unsigned HOST_WIDE_INT
3868 simd_clone_subparts (tree vectype)
3869 {
3870 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3871 }
3872
3873 /* Function vectorizable_simd_clone_call.
3874
3875 Check if STMT_INFO performs a function call that can be vectorized
3876 by calling a simd clone of the function.
3877 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3878 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3879 Return true if STMT_INFO is vectorizable in this way. */
3880
3881 static bool
3882 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3883 gimple_stmt_iterator *gsi,
3884 gimple **vec_stmt, slp_tree slp_node,
3885 stmt_vector_for_cost *)
3886 {
3887 tree vec_dest;
3888 tree scalar_dest;
3889 tree op, type;
3890 tree vec_oprnd0 = NULL_TREE;
3891 tree vectype;
3892 poly_uint64 nunits;
3893 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3894 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3895 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3896 tree fndecl, new_temp;
3897 int ncopies, j;
3898 auto_vec<simd_call_arg_info> arginfo;
3899 vec<tree> vargs = vNULL;
3900 size_t i, nargs;
3901 tree lhs, rtype, ratype;
3902 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3903
3904 /* Is STMT a vectorizable call? */
3905 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3906 if (!stmt)
3907 return false;
3908
3909 fndecl = gimple_call_fndecl (stmt);
3910 if (fndecl == NULL_TREE)
3911 return false;
3912
3913 struct cgraph_node *node = cgraph_node::get (fndecl);
3914 if (node == NULL || node->simd_clones == NULL)
3915 return false;
3916
3917 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3918 return false;
3919
3920 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3921 && ! vec_stmt)
3922 return false;
3923
3924 if (gimple_call_lhs (stmt)
3925 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3926 return false;
3927
3928 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3929
3930 vectype = STMT_VINFO_VECTYPE (stmt_info);
3931
3932 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3933 return false;
3934
3935 /* FORNOW */
3936 if (slp_node)
3937 return false;
3938
3939 /* Process function arguments. */
3940 nargs = gimple_call_num_args (stmt);
3941
3942 /* Bail out if the function has zero arguments. */
3943 if (nargs == 0)
3944 return false;
3945
3946 arginfo.reserve (nargs, true);
3947
3948 for (i = 0; i < nargs; i++)
3949 {
3950 simd_call_arg_info thisarginfo;
3951 affine_iv iv;
3952
3953 thisarginfo.linear_step = 0;
3954 thisarginfo.align = 0;
3955 thisarginfo.op = NULL_TREE;
3956 thisarginfo.simd_lane_linear = false;
3957
3958 op = gimple_call_arg (stmt, i);
3959 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3960 &thisarginfo.vectype)
3961 || thisarginfo.dt == vect_uninitialized_def)
3962 {
3963 if (dump_enabled_p ())
3964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3965 "use not simple.\n");
3966 return false;
3967 }
3968
3969 if (thisarginfo.dt == vect_constant_def
3970 || thisarginfo.dt == vect_external_def)
3971 gcc_assert (thisarginfo.vectype == NULL_TREE);
3972 else
3973 {
3974 gcc_assert (thisarginfo.vectype != NULL_TREE);
3975 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3976 {
3977 if (dump_enabled_p ())
3978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3979 "vector mask arguments are not supported\n");
3980 return false;
3981 }
3982 }
3983
3984 /* For linear arguments, the analyze phase should have saved
3985 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3986 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3987 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3988 {
3989 gcc_assert (vec_stmt);
3990 thisarginfo.linear_step
3991 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3992 thisarginfo.op
3993 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3994 thisarginfo.simd_lane_linear
3995 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3996 == boolean_true_node);
3997 /* If loop has been peeled for alignment, we need to adjust it. */
3998 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3999 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4000 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4001 {
4002 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4003 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4004 tree opt = TREE_TYPE (thisarginfo.op);
4005 bias = fold_convert (TREE_TYPE (step), bias);
4006 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4007 thisarginfo.op
4008 = fold_build2 (POINTER_TYPE_P (opt)
4009 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4010 thisarginfo.op, bias);
4011 }
4012 }
4013 else if (!vec_stmt
4014 && thisarginfo.dt != vect_constant_def
4015 && thisarginfo.dt != vect_external_def
4016 && loop_vinfo
4017 && TREE_CODE (op) == SSA_NAME
4018 && simple_iv (loop, loop_containing_stmt (stmt), op,
4019 &iv, false)
4020 && tree_fits_shwi_p (iv.step))
4021 {
4022 thisarginfo.linear_step = tree_to_shwi (iv.step);
4023 thisarginfo.op = iv.base;
4024 }
4025 else if ((thisarginfo.dt == vect_constant_def
4026 || thisarginfo.dt == vect_external_def)
4027 && POINTER_TYPE_P (TREE_TYPE (op)))
4028 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4029 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4030 linear too. */
4031 if (POINTER_TYPE_P (TREE_TYPE (op))
4032 && !thisarginfo.linear_step
4033 && !vec_stmt
4034 && thisarginfo.dt != vect_constant_def
4035 && thisarginfo.dt != vect_external_def
4036 && loop_vinfo
4037 && !slp_node
4038 && TREE_CODE (op) == SSA_NAME)
4039 vect_simd_lane_linear (op, loop, &thisarginfo);
4040
4041 arginfo.quick_push (thisarginfo);
4042 }
4043
4044 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4045 if (!vf.is_constant ())
4046 {
4047 if (dump_enabled_p ())
4048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4049 "not considering SIMD clones; not yet supported"
4050 " for variable-width vectors.\n");
4051 return false;
4052 }
4053
4054 unsigned int badness = 0;
4055 struct cgraph_node *bestn = NULL;
4056 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4057 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4058 else
4059 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4060 n = n->simdclone->next_clone)
4061 {
4062 unsigned int this_badness = 0;
4063 unsigned int num_calls;
4064 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4065 || n->simdclone->nargs != nargs)
4066 continue;
4067 if (num_calls != 1)
4068 this_badness += exact_log2 (num_calls) * 4096;
4069 if (n->simdclone->inbranch)
4070 this_badness += 8192;
4071 int target_badness = targetm.simd_clone.usable (n);
4072 if (target_badness < 0)
4073 continue;
4074 this_badness += target_badness * 512;
4075 /* FORNOW: Have to add code to add the mask argument. */
4076 if (n->simdclone->inbranch)
4077 continue;
4078 for (i = 0; i < nargs; i++)
4079 {
4080 switch (n->simdclone->args[i].arg_type)
4081 {
4082 case SIMD_CLONE_ARG_TYPE_VECTOR:
4083 if (!useless_type_conversion_p
4084 (n->simdclone->args[i].orig_type,
4085 TREE_TYPE (gimple_call_arg (stmt, i))))
4086 i = -1;
4087 else if (arginfo[i].dt == vect_constant_def
4088 || arginfo[i].dt == vect_external_def
4089 || arginfo[i].linear_step)
4090 this_badness += 64;
4091 break;
4092 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4093 if (arginfo[i].dt != vect_constant_def
4094 && arginfo[i].dt != vect_external_def)
4095 i = -1;
4096 break;
4097 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4098 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4099 if (arginfo[i].dt == vect_constant_def
4100 || arginfo[i].dt == vect_external_def
4101 || (arginfo[i].linear_step
4102 != n->simdclone->args[i].linear_step))
4103 i = -1;
4104 break;
4105 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4106 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4107 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4108 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4109 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4111 /* FORNOW */
4112 i = -1;
4113 break;
4114 case SIMD_CLONE_ARG_TYPE_MASK:
4115 gcc_unreachable ();
4116 }
4117 if (i == (size_t) -1)
4118 break;
4119 if (n->simdclone->args[i].alignment > arginfo[i].align)
4120 {
4121 i = -1;
4122 break;
4123 }
4124 if (arginfo[i].align)
4125 this_badness += (exact_log2 (arginfo[i].align)
4126 - exact_log2 (n->simdclone->args[i].alignment));
4127 }
4128 if (i == (size_t) -1)
4129 continue;
4130 if (bestn == NULL || this_badness < badness)
4131 {
4132 bestn = n;
4133 badness = this_badness;
4134 }
4135 }
4136
4137 if (bestn == NULL)
4138 return false;
4139
4140 for (i = 0; i < nargs; i++)
4141 if ((arginfo[i].dt == vect_constant_def
4142 || arginfo[i].dt == vect_external_def)
4143 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4144 {
4145 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4146 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4147 slp_node);
4148 if (arginfo[i].vectype == NULL
4149 || !constant_multiple_p (bestn->simdclone->simdlen,
4150 simd_clone_subparts (arginfo[i].vectype)))
4151 return false;
4152 }
4153
4154 fndecl = bestn->decl;
4155 nunits = bestn->simdclone->simdlen;
4156 ncopies = vector_unroll_factor (vf, nunits);
4157
4158 /* If the function isn't const, only allow it in simd loops where user
4159 has asserted that at least nunits consecutive iterations can be
4160 performed using SIMD instructions. */
4161 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4162 && gimple_vuse (stmt))
4163 return false;
4164
4165 /* Sanity check: make sure that at least one copy of the vectorized stmt
4166 needs to be generated. */
4167 gcc_assert (ncopies >= 1);
4168
4169 if (!vec_stmt) /* transformation not required. */
4170 {
4171 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4172 for (i = 0; i < nargs; i++)
4173 if ((bestn->simdclone->args[i].arg_type
4174 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4175 || (bestn->simdclone->args[i].arg_type
4176 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4177 {
4178 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4179 + 1,
4180 true);
4181 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4182 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4183 ? size_type_node : TREE_TYPE (arginfo[i].op);
4184 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4185 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4186 tree sll = arginfo[i].simd_lane_linear
4187 ? boolean_true_node : boolean_false_node;
4188 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4189 }
4190 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4191 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4192 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4193 dt, slp_node, cost_vec); */
4194 return true;
4195 }
4196
4197 /* Transform. */
4198
4199 if (dump_enabled_p ())
4200 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4201
4202 /* Handle def. */
4203 scalar_dest = gimple_call_lhs (stmt);
4204 vec_dest = NULL_TREE;
4205 rtype = NULL_TREE;
4206 ratype = NULL_TREE;
4207 if (scalar_dest)
4208 {
4209 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4210 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4211 if (TREE_CODE (rtype) == ARRAY_TYPE)
4212 {
4213 ratype = rtype;
4214 rtype = TREE_TYPE (ratype);
4215 }
4216 }
4217
4218 auto_vec<vec<tree> > vec_oprnds;
4219 auto_vec<unsigned> vec_oprnds_i;
4220 vec_oprnds.safe_grow_cleared (nargs, true);
4221 vec_oprnds_i.safe_grow_cleared (nargs, true);
4222 for (j = 0; j < ncopies; ++j)
4223 {
4224 /* Build argument list for the vectorized call. */
4225 if (j == 0)
4226 vargs.create (nargs);
4227 else
4228 vargs.truncate (0);
4229
4230 for (i = 0; i < nargs; i++)
4231 {
4232 unsigned int k, l, m, o;
4233 tree atype;
4234 op = gimple_call_arg (stmt, i);
4235 switch (bestn->simdclone->args[i].arg_type)
4236 {
4237 case SIMD_CLONE_ARG_TYPE_VECTOR:
4238 atype = bestn->simdclone->args[i].vector_type;
4239 o = vector_unroll_factor (nunits,
4240 simd_clone_subparts (atype));
4241 for (m = j * o; m < (j + 1) * o; m++)
4242 {
4243 if (simd_clone_subparts (atype)
4244 < simd_clone_subparts (arginfo[i].vectype))
4245 {
4246 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4247 k = (simd_clone_subparts (arginfo[i].vectype)
4248 / simd_clone_subparts (atype));
4249 gcc_assert ((k & (k - 1)) == 0);
4250 if (m == 0)
4251 {
4252 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4253 ncopies * o / k, op,
4254 &vec_oprnds[i]);
4255 vec_oprnds_i[i] = 0;
4256 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4257 }
4258 else
4259 {
4260 vec_oprnd0 = arginfo[i].op;
4261 if ((m & (k - 1)) == 0)
4262 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4263 }
4264 arginfo[i].op = vec_oprnd0;
4265 vec_oprnd0
4266 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4267 bitsize_int (prec),
4268 bitsize_int ((m & (k - 1)) * prec));
4269 gassign *new_stmt
4270 = gimple_build_assign (make_ssa_name (atype),
4271 vec_oprnd0);
4272 vect_finish_stmt_generation (vinfo, stmt_info,
4273 new_stmt, gsi);
4274 vargs.safe_push (gimple_assign_lhs (new_stmt));
4275 }
4276 else
4277 {
4278 k = (simd_clone_subparts (atype)
4279 / simd_clone_subparts (arginfo[i].vectype));
4280 gcc_assert ((k & (k - 1)) == 0);
4281 vec<constructor_elt, va_gc> *ctor_elts;
4282 if (k != 1)
4283 vec_alloc (ctor_elts, k);
4284 else
4285 ctor_elts = NULL;
4286 for (l = 0; l < k; l++)
4287 {
4288 if (m == 0 && l == 0)
4289 {
4290 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4291 k * o * ncopies,
4292 op,
4293 &vec_oprnds[i]);
4294 vec_oprnds_i[i] = 0;
4295 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4296 }
4297 else
4298 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4299 arginfo[i].op = vec_oprnd0;
4300 if (k == 1)
4301 break;
4302 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4303 vec_oprnd0);
4304 }
4305 if (k == 1)
4306 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4307 atype))
4308 {
4309 vec_oprnd0
4310 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4311 gassign *new_stmt
4312 = gimple_build_assign (make_ssa_name (atype),
4313 vec_oprnd0);
4314 vect_finish_stmt_generation (vinfo, stmt_info,
4315 new_stmt, gsi);
4316 vargs.safe_push (gimple_assign_lhs (new_stmt));
4317 }
4318 else
4319 vargs.safe_push (vec_oprnd0);
4320 else
4321 {
4322 vec_oprnd0 = build_constructor (atype, ctor_elts);
4323 gassign *new_stmt
4324 = gimple_build_assign (make_ssa_name (atype),
4325 vec_oprnd0);
4326 vect_finish_stmt_generation (vinfo, stmt_info,
4327 new_stmt, gsi);
4328 vargs.safe_push (gimple_assign_lhs (new_stmt));
4329 }
4330 }
4331 }
4332 break;
4333 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4334 vargs.safe_push (op);
4335 break;
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4337 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4338 if (j == 0)
4339 {
4340 gimple_seq stmts;
4341 arginfo[i].op
4342 = force_gimple_operand (unshare_expr (arginfo[i].op),
4343 &stmts, true, NULL_TREE);
4344 if (stmts != NULL)
4345 {
4346 basic_block new_bb;
4347 edge pe = loop_preheader_edge (loop);
4348 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4349 gcc_assert (!new_bb);
4350 }
4351 if (arginfo[i].simd_lane_linear)
4352 {
4353 vargs.safe_push (arginfo[i].op);
4354 break;
4355 }
4356 tree phi_res = copy_ssa_name (op);
4357 gphi *new_phi = create_phi_node (phi_res, loop->header);
4358 add_phi_arg (new_phi, arginfo[i].op,
4359 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4360 enum tree_code code
4361 = POINTER_TYPE_P (TREE_TYPE (op))
4362 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4363 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4364 ? sizetype : TREE_TYPE (op);
4365 poly_widest_int cst
4366 = wi::mul (bestn->simdclone->args[i].linear_step,
4367 ncopies * nunits);
4368 tree tcst = wide_int_to_tree (type, cst);
4369 tree phi_arg = copy_ssa_name (op);
4370 gassign *new_stmt
4371 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4372 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4373 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4374 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4375 UNKNOWN_LOCATION);
4376 arginfo[i].op = phi_res;
4377 vargs.safe_push (phi_res);
4378 }
4379 else
4380 {
4381 enum tree_code code
4382 = POINTER_TYPE_P (TREE_TYPE (op))
4383 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4384 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4385 ? sizetype : TREE_TYPE (op);
4386 poly_widest_int cst
4387 = wi::mul (bestn->simdclone->args[i].linear_step,
4388 j * nunits);
4389 tree tcst = wide_int_to_tree (type, cst);
4390 new_temp = make_ssa_name (TREE_TYPE (op));
4391 gassign *new_stmt
4392 = gimple_build_assign (new_temp, code,
4393 arginfo[i].op, tcst);
4394 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4395 vargs.safe_push (new_temp);
4396 }
4397 break;
4398 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4399 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4400 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4401 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4402 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4403 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4404 default:
4405 gcc_unreachable ();
4406 }
4407 }
4408
4409 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4410 if (vec_dest)
4411 {
4412 gcc_assert (ratype
4413 || known_eq (simd_clone_subparts (rtype), nunits));
4414 if (ratype)
4415 new_temp = create_tmp_var (ratype);
4416 else if (useless_type_conversion_p (vectype, rtype))
4417 new_temp = make_ssa_name (vec_dest, new_call);
4418 else
4419 new_temp = make_ssa_name (rtype, new_call);
4420 gimple_call_set_lhs (new_call, new_temp);
4421 }
4422 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4423 gimple *new_stmt = new_call;
4424
4425 if (vec_dest)
4426 {
4427 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4428 {
4429 unsigned int k, l;
4430 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4431 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4432 k = vector_unroll_factor (nunits,
4433 simd_clone_subparts (vectype));
4434 gcc_assert ((k & (k - 1)) == 0);
4435 for (l = 0; l < k; l++)
4436 {
4437 tree t;
4438 if (ratype)
4439 {
4440 t = build_fold_addr_expr (new_temp);
4441 t = build2 (MEM_REF, vectype, t,
4442 build_int_cst (TREE_TYPE (t), l * bytes));
4443 }
4444 else
4445 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4446 bitsize_int (prec), bitsize_int (l * prec));
4447 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4448 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4449
4450 if (j == 0 && l == 0)
4451 *vec_stmt = new_stmt;
4452 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4453 }
4454
4455 if (ratype)
4456 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4457 continue;
4458 }
4459 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4460 {
4461 unsigned int k = (simd_clone_subparts (vectype)
4462 / simd_clone_subparts (rtype));
4463 gcc_assert ((k & (k - 1)) == 0);
4464 if ((j & (k - 1)) == 0)
4465 vec_alloc (ret_ctor_elts, k);
4466 if (ratype)
4467 {
4468 unsigned int m, o;
4469 o = vector_unroll_factor (nunits,
4470 simd_clone_subparts (rtype));
4471 for (m = 0; m < o; m++)
4472 {
4473 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4474 size_int (m), NULL_TREE, NULL_TREE);
4475 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4476 tem);
4477 vect_finish_stmt_generation (vinfo, stmt_info,
4478 new_stmt, gsi);
4479 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4480 gimple_assign_lhs (new_stmt));
4481 }
4482 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4483 }
4484 else
4485 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4486 if ((j & (k - 1)) != k - 1)
4487 continue;
4488 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4489 new_stmt
4490 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4491 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4492
4493 if ((unsigned) j == k - 1)
4494 *vec_stmt = new_stmt;
4495 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4496 continue;
4497 }
4498 else if (ratype)
4499 {
4500 tree t = build_fold_addr_expr (new_temp);
4501 t = build2 (MEM_REF, vectype, t,
4502 build_int_cst (TREE_TYPE (t), 0));
4503 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4504 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4505 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4506 }
4507 else if (!useless_type_conversion_p (vectype, rtype))
4508 {
4509 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4510 new_stmt
4511 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4512 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4513 }
4514 }
4515
4516 if (j == 0)
4517 *vec_stmt = new_stmt;
4518 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4519 }
4520
4521 for (i = 0; i < nargs; ++i)
4522 {
4523 vec<tree> oprndsi = vec_oprnds[i];
4524 oprndsi.release ();
4525 }
4526 vargs.release ();
4527
4528 /* The call in STMT might prevent it from being removed in dce.
4529 We however cannot remove it here, due to the way the ssa name
4530 it defines is mapped to the new definition. So just replace
4531 rhs of the statement with something harmless. */
4532
4533 if (slp_node)
4534 return true;
4535
4536 gimple *new_stmt;
4537 if (scalar_dest)
4538 {
4539 type = TREE_TYPE (scalar_dest);
4540 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4541 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4542 }
4543 else
4544 new_stmt = gimple_build_nop ();
4545 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4546 unlink_stmt_vdef (stmt);
4547
4548 return true;
4549 }
4550
4551
4552 /* Function vect_gen_widened_results_half
4553
4554 Create a vector stmt whose code, type, number of arguments, and result
4555 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4556 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4557 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4558 needs to be created (DECL is a function-decl of a target-builtin).
4559 STMT_INFO is the original scalar stmt that we are vectorizing. */
4560
4561 static gimple *
4562 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4563 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4564 tree vec_dest, gimple_stmt_iterator *gsi,
4565 stmt_vec_info stmt_info)
4566 {
4567 gimple *new_stmt;
4568 tree new_temp;
4569
4570 /* Generate half of the widened result: */
4571 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4572 if (op_type != binary_op)
4573 vec_oprnd1 = NULL;
4574 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4575 new_temp = make_ssa_name (vec_dest, new_stmt);
4576 gimple_assign_set_lhs (new_stmt, new_temp);
4577 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4578
4579 return new_stmt;
4580 }
4581
4582
4583 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4584 For multi-step conversions store the resulting vectors and call the function
4585 recursively. */
4586
4587 static void
4588 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4589 int multi_step_cvt,
4590 stmt_vec_info stmt_info,
4591 vec<tree> &vec_dsts,
4592 gimple_stmt_iterator *gsi,
4593 slp_tree slp_node, enum tree_code code)
4594 {
4595 unsigned int i;
4596 tree vop0, vop1, new_tmp, vec_dest;
4597
4598 vec_dest = vec_dsts.pop ();
4599
4600 for (i = 0; i < vec_oprnds->length (); i += 2)
4601 {
4602 /* Create demotion operation. */
4603 vop0 = (*vec_oprnds)[i];
4604 vop1 = (*vec_oprnds)[i + 1];
4605 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4606 new_tmp = make_ssa_name (vec_dest, new_stmt);
4607 gimple_assign_set_lhs (new_stmt, new_tmp);
4608 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4609
4610 if (multi_step_cvt)
4611 /* Store the resulting vector for next recursive call. */
4612 (*vec_oprnds)[i/2] = new_tmp;
4613 else
4614 {
4615 /* This is the last step of the conversion sequence. Store the
4616 vectors in SLP_NODE or in vector info of the scalar statement
4617 (or in STMT_VINFO_RELATED_STMT chain). */
4618 if (slp_node)
4619 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4620 else
4621 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4622 }
4623 }
4624
4625 /* For multi-step demotion operations we first generate demotion operations
4626 from the source type to the intermediate types, and then combine the
4627 results (stored in VEC_OPRNDS) in demotion operation to the destination
4628 type. */
4629 if (multi_step_cvt)
4630 {
4631 /* At each level of recursion we have half of the operands we had at the
4632 previous level. */
4633 vec_oprnds->truncate ((i+1)/2);
4634 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4635 multi_step_cvt - 1,
4636 stmt_info, vec_dsts, gsi,
4637 slp_node, VEC_PACK_TRUNC_EXPR);
4638 }
4639
4640 vec_dsts.quick_push (vec_dest);
4641 }
4642
4643
4644 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4645 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4646 STMT_INFO. For multi-step conversions store the resulting vectors and
4647 call the function recursively. */
4648
4649 static void
4650 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4651 vec<tree> *vec_oprnds0,
4652 vec<tree> *vec_oprnds1,
4653 stmt_vec_info stmt_info, tree vec_dest,
4654 gimple_stmt_iterator *gsi,
4655 enum tree_code code1,
4656 enum tree_code code2, int op_type)
4657 {
4658 int i;
4659 tree vop0, vop1, new_tmp1, new_tmp2;
4660 gimple *new_stmt1, *new_stmt2;
4661 vec<tree> vec_tmp = vNULL;
4662
4663 vec_tmp.create (vec_oprnds0->length () * 2);
4664 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4665 {
4666 if (op_type == binary_op)
4667 vop1 = (*vec_oprnds1)[i];
4668 else
4669 vop1 = NULL_TREE;
4670
4671 /* Generate the two halves of promotion operation. */
4672 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4673 op_type, vec_dest, gsi,
4674 stmt_info);
4675 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4676 op_type, vec_dest, gsi,
4677 stmt_info);
4678 if (is_gimple_call (new_stmt1))
4679 {
4680 new_tmp1 = gimple_call_lhs (new_stmt1);
4681 new_tmp2 = gimple_call_lhs (new_stmt2);
4682 }
4683 else
4684 {
4685 new_tmp1 = gimple_assign_lhs (new_stmt1);
4686 new_tmp2 = gimple_assign_lhs (new_stmt2);
4687 }
4688
4689 /* Store the results for the next step. */
4690 vec_tmp.quick_push (new_tmp1);
4691 vec_tmp.quick_push (new_tmp2);
4692 }
4693
4694 vec_oprnds0->release ();
4695 *vec_oprnds0 = vec_tmp;
4696 }
4697
4698 /* Create vectorized promotion stmts for widening stmts using only half the
4699 potential vector size for input. */
4700 static void
4701 vect_create_half_widening_stmts (vec_info *vinfo,
4702 vec<tree> *vec_oprnds0,
4703 vec<tree> *vec_oprnds1,
4704 stmt_vec_info stmt_info, tree vec_dest,
4705 gimple_stmt_iterator *gsi,
4706 enum tree_code code1,
4707 int op_type)
4708 {
4709 int i;
4710 tree vop0, vop1;
4711 gimple *new_stmt1;
4712 gimple *new_stmt2;
4713 gimple *new_stmt3;
4714 vec<tree> vec_tmp = vNULL;
4715
4716 vec_tmp.create (vec_oprnds0->length ());
4717 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4718 {
4719 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4720
4721 gcc_assert (op_type == binary_op);
4722 vop1 = (*vec_oprnds1)[i];
4723
4724 /* Widen the first vector input. */
4725 out_type = TREE_TYPE (vec_dest);
4726 new_tmp1 = make_ssa_name (out_type);
4727 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4728 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4729 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4730 {
4731 /* Widen the second vector input. */
4732 new_tmp2 = make_ssa_name (out_type);
4733 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4734 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4735 /* Perform the operation. With both vector inputs widened. */
4736 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4737 }
4738 else
4739 {
4740 /* Perform the operation. With the single vector input widened. */
4741 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4742 }
4743
4744 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4745 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4747
4748 /* Store the results for the next step. */
4749 vec_tmp.quick_push (new_tmp3);
4750 }
4751
4752 vec_oprnds0->release ();
4753 *vec_oprnds0 = vec_tmp;
4754 }
4755
4756
4757 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4758 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4759 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4760 Return true if STMT_INFO is vectorizable in this way. */
4761
4762 static bool
4763 vectorizable_conversion (vec_info *vinfo,
4764 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4765 gimple **vec_stmt, slp_tree slp_node,
4766 stmt_vector_for_cost *cost_vec)
4767 {
4768 tree vec_dest;
4769 tree scalar_dest;
4770 tree op0, op1 = NULL_TREE;
4771 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4772 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4773 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4774 tree new_temp;
4775 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4776 int ndts = 2;
4777 poly_uint64 nunits_in;
4778 poly_uint64 nunits_out;
4779 tree vectype_out, vectype_in;
4780 int ncopies, i;
4781 tree lhs_type, rhs_type;
4782 enum { NARROW, NONE, WIDEN } modifier;
4783 vec<tree> vec_oprnds0 = vNULL;
4784 vec<tree> vec_oprnds1 = vNULL;
4785 tree vop0;
4786 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4787 int multi_step_cvt = 0;
4788 vec<tree> interm_types = vNULL;
4789 tree intermediate_type, cvt_type = NULL_TREE;
4790 int op_type;
4791 unsigned short fltsz;
4792
4793 /* Is STMT a vectorizable conversion? */
4794
4795 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4796 return false;
4797
4798 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4799 && ! vec_stmt)
4800 return false;
4801
4802 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4803 if (!stmt)
4804 return false;
4805
4806 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4807 return false;
4808
4809 code = gimple_assign_rhs_code (stmt);
4810 if (!CONVERT_EXPR_CODE_P (code)
4811 && code != FIX_TRUNC_EXPR
4812 && code != FLOAT_EXPR
4813 && code != WIDEN_PLUS_EXPR
4814 && code != WIDEN_MINUS_EXPR
4815 && code != WIDEN_MULT_EXPR
4816 && code != WIDEN_LSHIFT_EXPR)
4817 return false;
4818
4819 bool widen_arith = (code == WIDEN_PLUS_EXPR
4820 || code == WIDEN_MINUS_EXPR
4821 || code == WIDEN_MULT_EXPR
4822 || code == WIDEN_LSHIFT_EXPR);
4823 op_type = TREE_CODE_LENGTH (code);
4824
4825 /* Check types of lhs and rhs. */
4826 scalar_dest = gimple_assign_lhs (stmt);
4827 lhs_type = TREE_TYPE (scalar_dest);
4828 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4829
4830 /* Check the operands of the operation. */
4831 slp_tree slp_op0, slp_op1 = NULL;
4832 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4833 0, &op0, &slp_op0, &dt[0], &vectype_in))
4834 {
4835 if (dump_enabled_p ())
4836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4837 "use not simple.\n");
4838 return false;
4839 }
4840
4841 rhs_type = TREE_TYPE (op0);
4842 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4843 && !((INTEGRAL_TYPE_P (lhs_type)
4844 && INTEGRAL_TYPE_P (rhs_type))
4845 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4846 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4847 return false;
4848
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4850 && ((INTEGRAL_TYPE_P (lhs_type)
4851 && !type_has_mode_precision_p (lhs_type))
4852 || (INTEGRAL_TYPE_P (rhs_type)
4853 && !type_has_mode_precision_p (rhs_type))))
4854 {
4855 if (dump_enabled_p ())
4856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4857 "type conversion to/from bit-precision unsupported."
4858 "\n");
4859 return false;
4860 }
4861
4862 if (op_type == binary_op)
4863 {
4864 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4865 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4866
4867 op1 = gimple_assign_rhs2 (stmt);
4868 tree vectype1_in;
4869 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4870 &op1, &slp_op1, &dt[1], &vectype1_in))
4871 {
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4874 "use not simple.\n");
4875 return false;
4876 }
4877 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4878 OP1. */
4879 if (!vectype_in)
4880 vectype_in = vectype1_in;
4881 }
4882
4883 /* If op0 is an external or constant def, infer the vector type
4884 from the scalar type. */
4885 if (!vectype_in)
4886 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4887 if (vec_stmt)
4888 gcc_assert (vectype_in);
4889 if (!vectype_in)
4890 {
4891 if (dump_enabled_p ())
4892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4893 "no vectype for scalar type %T\n", rhs_type);
4894
4895 return false;
4896 }
4897
4898 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4899 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4900 {
4901 if (dump_enabled_p ())
4902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4903 "can't convert between boolean and non "
4904 "boolean vectors %T\n", rhs_type);
4905
4906 return false;
4907 }
4908
4909 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4910 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4911 if (known_eq (nunits_out, nunits_in))
4912 if (widen_arith)
4913 modifier = WIDEN;
4914 else
4915 modifier = NONE;
4916 else if (multiple_p (nunits_out, nunits_in))
4917 modifier = NARROW;
4918 else
4919 {
4920 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4921 modifier = WIDEN;
4922 }
4923
4924 /* Multiple types in SLP are handled by creating the appropriate number of
4925 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4926 case of SLP. */
4927 if (slp_node)
4928 ncopies = 1;
4929 else if (modifier == NARROW)
4930 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4931 else
4932 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4933
4934 /* Sanity check: make sure that at least one copy of the vectorized stmt
4935 needs to be generated. */
4936 gcc_assert (ncopies >= 1);
4937
4938 bool found_mode = false;
4939 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4940 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4941 opt_scalar_mode rhs_mode_iter;
4942
4943 /* Supportable by target? */
4944 switch (modifier)
4945 {
4946 case NONE:
4947 if (code != FIX_TRUNC_EXPR
4948 && code != FLOAT_EXPR
4949 && !CONVERT_EXPR_CODE_P (code))
4950 return false;
4951 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4952 break;
4953 /* FALLTHRU */
4954 unsupported:
4955 if (dump_enabled_p ())
4956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4957 "conversion not supported by target.\n");
4958 return false;
4959
4960 case WIDEN:
4961 if (known_eq (nunits_in, nunits_out))
4962 {
4963 if (!supportable_half_widening_operation (code, vectype_out,
4964 vectype_in, &code1))
4965 goto unsupported;
4966 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4967 break;
4968 }
4969 if (supportable_widening_operation (vinfo, code, stmt_info,
4970 vectype_out, vectype_in, &code1,
4971 &code2, &multi_step_cvt,
4972 &interm_types))
4973 {
4974 /* Binary widening operation can only be supported directly by the
4975 architecture. */
4976 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4977 break;
4978 }
4979
4980 if (code != FLOAT_EXPR
4981 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4982 goto unsupported;
4983
4984 fltsz = GET_MODE_SIZE (lhs_mode);
4985 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4986 {
4987 rhs_mode = rhs_mode_iter.require ();
4988 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4989 break;
4990
4991 cvt_type
4992 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4993 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4994 if (cvt_type == NULL_TREE)
4995 goto unsupported;
4996
4997 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4998 {
4999 if (!supportable_convert_operation (code, vectype_out,
5000 cvt_type, &codecvt1))
5001 goto unsupported;
5002 }
5003 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5004 vectype_out, cvt_type,
5005 &codecvt1, &codecvt2,
5006 &multi_step_cvt,
5007 &interm_types))
5008 continue;
5009 else
5010 gcc_assert (multi_step_cvt == 0);
5011
5012 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5013 cvt_type,
5014 vectype_in, &code1, &code2,
5015 &multi_step_cvt, &interm_types))
5016 {
5017 found_mode = true;
5018 break;
5019 }
5020 }
5021
5022 if (!found_mode)
5023 goto unsupported;
5024
5025 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5026 codecvt2 = ERROR_MARK;
5027 else
5028 {
5029 multi_step_cvt++;
5030 interm_types.safe_push (cvt_type);
5031 cvt_type = NULL_TREE;
5032 }
5033 break;
5034
5035 case NARROW:
5036 gcc_assert (op_type == unary_op);
5037 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5038 &code1, &multi_step_cvt,
5039 &interm_types))
5040 break;
5041
5042 if (code != FIX_TRUNC_EXPR
5043 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5044 goto unsupported;
5045
5046 cvt_type
5047 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5048 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5049 if (cvt_type == NULL_TREE)
5050 goto unsupported;
5051 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5052 &codecvt1))
5053 goto unsupported;
5054 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5055 &code1, &multi_step_cvt,
5056 &interm_types))
5057 break;
5058 goto unsupported;
5059
5060 default:
5061 gcc_unreachable ();
5062 }
5063
5064 if (!vec_stmt) /* transformation not required. */
5065 {
5066 if (slp_node
5067 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5068 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5069 {
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5072 "incompatible vector types for invariants\n");
5073 return false;
5074 }
5075 DUMP_VECT_SCOPE ("vectorizable_conversion");
5076 if (modifier == NONE)
5077 {
5078 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5079 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5080 cost_vec);
5081 }
5082 else if (modifier == NARROW)
5083 {
5084 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5085 /* The final packing step produces one vector result per copy. */
5086 unsigned int nvectors
5087 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5088 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5089 multi_step_cvt, cost_vec,
5090 widen_arith);
5091 }
5092 else
5093 {
5094 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5095 /* The initial unpacking step produces two vector results
5096 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5097 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5098 unsigned int nvectors
5099 = (slp_node
5100 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5101 : ncopies * 2);
5102 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5103 multi_step_cvt, cost_vec,
5104 widen_arith);
5105 }
5106 interm_types.release ();
5107 return true;
5108 }
5109
5110 /* Transform. */
5111 if (dump_enabled_p ())
5112 dump_printf_loc (MSG_NOTE, vect_location,
5113 "transform conversion. ncopies = %d.\n", ncopies);
5114
5115 if (op_type == binary_op)
5116 {
5117 if (CONSTANT_CLASS_P (op0))
5118 op0 = fold_convert (TREE_TYPE (op1), op0);
5119 else if (CONSTANT_CLASS_P (op1))
5120 op1 = fold_convert (TREE_TYPE (op0), op1);
5121 }
5122
5123 /* In case of multi-step conversion, we first generate conversion operations
5124 to the intermediate types, and then from that types to the final one.
5125 We create vector destinations for the intermediate type (TYPES) received
5126 from supportable_*_operation, and store them in the correct order
5127 for future use in vect_create_vectorized_*_stmts (). */
5128 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5129 vec_dest = vect_create_destination_var (scalar_dest,
5130 (cvt_type && modifier == WIDEN)
5131 ? cvt_type : vectype_out);
5132 vec_dsts.quick_push (vec_dest);
5133
5134 if (multi_step_cvt)
5135 {
5136 for (i = interm_types.length () - 1;
5137 interm_types.iterate (i, &intermediate_type); i--)
5138 {
5139 vec_dest = vect_create_destination_var (scalar_dest,
5140 intermediate_type);
5141 vec_dsts.quick_push (vec_dest);
5142 }
5143 }
5144
5145 if (cvt_type)
5146 vec_dest = vect_create_destination_var (scalar_dest,
5147 modifier == WIDEN
5148 ? vectype_out : cvt_type);
5149
5150 int ninputs = 1;
5151 if (!slp_node)
5152 {
5153 if (modifier == WIDEN)
5154 ;
5155 else if (modifier == NARROW)
5156 {
5157 if (multi_step_cvt)
5158 ninputs = vect_pow2 (multi_step_cvt);
5159 ninputs *= 2;
5160 }
5161 }
5162
5163 switch (modifier)
5164 {
5165 case NONE:
5166 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5167 op0, &vec_oprnds0);
5168 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5169 {
5170 /* Arguments are ready, create the new vector stmt. */
5171 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5172 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5173 new_temp = make_ssa_name (vec_dest, new_stmt);
5174 gimple_assign_set_lhs (new_stmt, new_temp);
5175 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5176
5177 if (slp_node)
5178 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5179 else
5180 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5181 }
5182 break;
5183
5184 case WIDEN:
5185 /* In case the vectorization factor (VF) is bigger than the number
5186 of elements that we can fit in a vectype (nunits), we have to
5187 generate more than one vector stmt - i.e - we need to "unroll"
5188 the vector stmt by a factor VF/nunits. */
5189 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5190 op0, &vec_oprnds0,
5191 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5192 &vec_oprnds1);
5193 if (code == WIDEN_LSHIFT_EXPR)
5194 {
5195 int oprnds_size = vec_oprnds0.length ();
5196 vec_oprnds1.create (oprnds_size);
5197 for (i = 0; i < oprnds_size; ++i)
5198 vec_oprnds1.quick_push (op1);
5199 }
5200 /* Arguments are ready. Create the new vector stmts. */
5201 for (i = multi_step_cvt; i >= 0; i--)
5202 {
5203 tree this_dest = vec_dsts[i];
5204 enum tree_code c1 = code1, c2 = code2;
5205 if (i == 0 && codecvt2 != ERROR_MARK)
5206 {
5207 c1 = codecvt1;
5208 c2 = codecvt2;
5209 }
5210 if (known_eq (nunits_out, nunits_in))
5211 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5212 &vec_oprnds1, stmt_info,
5213 this_dest, gsi,
5214 c1, op_type);
5215 else
5216 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5217 &vec_oprnds1, stmt_info,
5218 this_dest, gsi,
5219 c1, c2, op_type);
5220 }
5221
5222 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5223 {
5224 gimple *new_stmt;
5225 if (cvt_type)
5226 {
5227 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5228 new_temp = make_ssa_name (vec_dest);
5229 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5230 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5231 }
5232 else
5233 new_stmt = SSA_NAME_DEF_STMT (vop0);
5234
5235 if (slp_node)
5236 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5237 else
5238 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5239 }
5240 break;
5241
5242 case NARROW:
5243 /* In case the vectorization factor (VF) is bigger than the number
5244 of elements that we can fit in a vectype (nunits), we have to
5245 generate more than one vector stmt - i.e - we need to "unroll"
5246 the vector stmt by a factor VF/nunits. */
5247 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5248 op0, &vec_oprnds0);
5249 /* Arguments are ready. Create the new vector stmts. */
5250 if (cvt_type)
5251 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5252 {
5253 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5254 new_temp = make_ssa_name (vec_dest);
5255 gassign *new_stmt
5256 = gimple_build_assign (new_temp, codecvt1, vop0);
5257 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5258 vec_oprnds0[i] = new_temp;
5259 }
5260
5261 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5262 multi_step_cvt,
5263 stmt_info, vec_dsts, gsi,
5264 slp_node, code1);
5265 break;
5266 }
5267 if (!slp_node)
5268 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5269
5270 vec_oprnds0.release ();
5271 vec_oprnds1.release ();
5272 interm_types.release ();
5273
5274 return true;
5275 }
5276
5277 /* Return true if we can assume from the scalar form of STMT_INFO that
5278 neither the scalar nor the vector forms will generate code. STMT_INFO
5279 is known not to involve a data reference. */
5280
5281 bool
5282 vect_nop_conversion_p (stmt_vec_info stmt_info)
5283 {
5284 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5285 if (!stmt)
5286 return false;
5287
5288 tree lhs = gimple_assign_lhs (stmt);
5289 tree_code code = gimple_assign_rhs_code (stmt);
5290 tree rhs = gimple_assign_rhs1 (stmt);
5291
5292 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5293 return true;
5294
5295 if (CONVERT_EXPR_CODE_P (code))
5296 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5297
5298 return false;
5299 }
5300
5301 /* Function vectorizable_assignment.
5302
5303 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5304 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5305 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5306 Return true if STMT_INFO is vectorizable in this way. */
5307
5308 static bool
5309 vectorizable_assignment (vec_info *vinfo,
5310 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5311 gimple **vec_stmt, slp_tree slp_node,
5312 stmt_vector_for_cost *cost_vec)
5313 {
5314 tree vec_dest;
5315 tree scalar_dest;
5316 tree op;
5317 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5318 tree new_temp;
5319 enum vect_def_type dt[1] = {vect_unknown_def_type};
5320 int ndts = 1;
5321 int ncopies;
5322 int i;
5323 vec<tree> vec_oprnds = vNULL;
5324 tree vop;
5325 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5326 enum tree_code code;
5327 tree vectype_in;
5328
5329 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5330 return false;
5331
5332 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5333 && ! vec_stmt)
5334 return false;
5335
5336 /* Is vectorizable assignment? */
5337 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5338 if (!stmt)
5339 return false;
5340
5341 scalar_dest = gimple_assign_lhs (stmt);
5342 if (TREE_CODE (scalar_dest) != SSA_NAME)
5343 return false;
5344
5345 if (STMT_VINFO_DATA_REF (stmt_info))
5346 return false;
5347
5348 code = gimple_assign_rhs_code (stmt);
5349 if (!(gimple_assign_single_p (stmt)
5350 || code == PAREN_EXPR
5351 || CONVERT_EXPR_CODE_P (code)))
5352 return false;
5353
5354 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5355 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5356
5357 /* Multiple types in SLP are handled by creating the appropriate number of
5358 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5359 case of SLP. */
5360 if (slp_node)
5361 ncopies = 1;
5362 else
5363 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5364
5365 gcc_assert (ncopies >= 1);
5366
5367 slp_tree slp_op;
5368 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5369 &dt[0], &vectype_in))
5370 {
5371 if (dump_enabled_p ())
5372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5373 "use not simple.\n");
5374 return false;
5375 }
5376 if (!vectype_in)
5377 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5378
5379 /* We can handle NOP_EXPR conversions that do not change the number
5380 of elements or the vector size. */
5381 if ((CONVERT_EXPR_CODE_P (code)
5382 || code == VIEW_CONVERT_EXPR)
5383 && (!vectype_in
5384 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5385 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5386 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5387 return false;
5388
5389 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5390 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5391 {
5392 if (dump_enabled_p ())
5393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5394 "can't convert between boolean and non "
5395 "boolean vectors %T\n", TREE_TYPE (op));
5396
5397 return false;
5398 }
5399
5400 /* We do not handle bit-precision changes. */
5401 if ((CONVERT_EXPR_CODE_P (code)
5402 || code == VIEW_CONVERT_EXPR)
5403 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5404 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5405 || !type_has_mode_precision_p (TREE_TYPE (op)))
5406 /* But a conversion that does not change the bit-pattern is ok. */
5407 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5408 > TYPE_PRECISION (TREE_TYPE (op)))
5409 && TYPE_UNSIGNED (TREE_TYPE (op))))
5410 {
5411 if (dump_enabled_p ())
5412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5413 "type conversion to/from bit-precision "
5414 "unsupported.\n");
5415 return false;
5416 }
5417
5418 if (!vec_stmt) /* transformation not required. */
5419 {
5420 if (slp_node
5421 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5422 {
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5425 "incompatible vector types for invariants\n");
5426 return false;
5427 }
5428 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5429 DUMP_VECT_SCOPE ("vectorizable_assignment");
5430 if (!vect_nop_conversion_p (stmt_info))
5431 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5432 cost_vec);
5433 return true;
5434 }
5435
5436 /* Transform. */
5437 if (dump_enabled_p ())
5438 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5439
5440 /* Handle def. */
5441 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5442
5443 /* Handle use. */
5444 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5445
5446 /* Arguments are ready. create the new vector stmt. */
5447 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5448 {
5449 if (CONVERT_EXPR_CODE_P (code)
5450 || code == VIEW_CONVERT_EXPR)
5451 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5452 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5453 new_temp = make_ssa_name (vec_dest, new_stmt);
5454 gimple_assign_set_lhs (new_stmt, new_temp);
5455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5456 if (slp_node)
5457 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5458 else
5459 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5460 }
5461 if (!slp_node)
5462 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5463
5464 vec_oprnds.release ();
5465 return true;
5466 }
5467
5468
5469 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5470 either as shift by a scalar or by a vector. */
5471
5472 bool
5473 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5474 {
5475
5476 machine_mode vec_mode;
5477 optab optab;
5478 int icode;
5479 tree vectype;
5480
5481 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5482 if (!vectype)
5483 return false;
5484
5485 optab = optab_for_tree_code (code, vectype, optab_scalar);
5486 if (!optab
5487 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5488 {
5489 optab = optab_for_tree_code (code, vectype, optab_vector);
5490 if (!optab
5491 || (optab_handler (optab, TYPE_MODE (vectype))
5492 == CODE_FOR_nothing))
5493 return false;
5494 }
5495
5496 vec_mode = TYPE_MODE (vectype);
5497 icode = (int) optab_handler (optab, vec_mode);
5498 if (icode == CODE_FOR_nothing)
5499 return false;
5500
5501 return true;
5502 }
5503
5504
5505 /* Function vectorizable_shift.
5506
5507 Check if STMT_INFO performs a shift operation that can be vectorized.
5508 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5509 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5510 Return true if STMT_INFO is vectorizable in this way. */
5511
5512 static bool
5513 vectorizable_shift (vec_info *vinfo,
5514 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5515 gimple **vec_stmt, slp_tree slp_node,
5516 stmt_vector_for_cost *cost_vec)
5517 {
5518 tree vec_dest;
5519 tree scalar_dest;
5520 tree op0, op1 = NULL;
5521 tree vec_oprnd1 = NULL_TREE;
5522 tree vectype;
5523 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5524 enum tree_code code;
5525 machine_mode vec_mode;
5526 tree new_temp;
5527 optab optab;
5528 int icode;
5529 machine_mode optab_op2_mode;
5530 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5531 int ndts = 2;
5532 poly_uint64 nunits_in;
5533 poly_uint64 nunits_out;
5534 tree vectype_out;
5535 tree op1_vectype;
5536 int ncopies;
5537 int i;
5538 vec<tree> vec_oprnds0 = vNULL;
5539 vec<tree> vec_oprnds1 = vNULL;
5540 tree vop0, vop1;
5541 unsigned int k;
5542 bool scalar_shift_arg = true;
5543 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5544 bool incompatible_op1_vectype_p = false;
5545
5546 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5547 return false;
5548
5549 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5550 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5551 && ! vec_stmt)
5552 return false;
5553
5554 /* Is STMT a vectorizable binary/unary operation? */
5555 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5556 if (!stmt)
5557 return false;
5558
5559 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5560 return false;
5561
5562 code = gimple_assign_rhs_code (stmt);
5563
5564 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5565 || code == RROTATE_EXPR))
5566 return false;
5567
5568 scalar_dest = gimple_assign_lhs (stmt);
5569 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5570 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5571 {
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574 "bit-precision shifts not supported.\n");
5575 return false;
5576 }
5577
5578 slp_tree slp_op0;
5579 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5580 0, &op0, &slp_op0, &dt[0], &vectype))
5581 {
5582 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5584 "use not simple.\n");
5585 return false;
5586 }
5587 /* If op0 is an external or constant def, infer the vector type
5588 from the scalar type. */
5589 if (!vectype)
5590 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5591 if (vec_stmt)
5592 gcc_assert (vectype);
5593 if (!vectype)
5594 {
5595 if (dump_enabled_p ())
5596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5597 "no vectype for scalar type\n");
5598 return false;
5599 }
5600
5601 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5602 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5603 if (maybe_ne (nunits_out, nunits_in))
5604 return false;
5605
5606 stmt_vec_info op1_def_stmt_info;
5607 slp_tree slp_op1;
5608 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5609 &dt[1], &op1_vectype, &op1_def_stmt_info))
5610 {
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5613 "use not simple.\n");
5614 return false;
5615 }
5616
5617 /* Multiple types in SLP are handled by creating the appropriate number of
5618 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5619 case of SLP. */
5620 if (slp_node)
5621 ncopies = 1;
5622 else
5623 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5624
5625 gcc_assert (ncopies >= 1);
5626
5627 /* Determine whether the shift amount is a vector, or scalar. If the
5628 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5629
5630 if ((dt[1] == vect_internal_def
5631 || dt[1] == vect_induction_def
5632 || dt[1] == vect_nested_cycle)
5633 && !slp_node)
5634 scalar_shift_arg = false;
5635 else if (dt[1] == vect_constant_def
5636 || dt[1] == vect_external_def
5637 || dt[1] == vect_internal_def)
5638 {
5639 /* In SLP, need to check whether the shift count is the same,
5640 in loops if it is a constant or invariant, it is always
5641 a scalar shift. */
5642 if (slp_node)
5643 {
5644 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5645 stmt_vec_info slpstmt_info;
5646
5647 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5648 {
5649 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5650 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5651 scalar_shift_arg = false;
5652 }
5653
5654 /* For internal SLP defs we have to make sure we see scalar stmts
5655 for all vector elements.
5656 ??? For different vectors we could resort to a different
5657 scalar shift operand but code-generation below simply always
5658 takes the first. */
5659 if (dt[1] == vect_internal_def
5660 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5661 stmts.length ()))
5662 scalar_shift_arg = false;
5663 }
5664
5665 /* If the shift amount is computed by a pattern stmt we cannot
5666 use the scalar amount directly thus give up and use a vector
5667 shift. */
5668 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5669 scalar_shift_arg = false;
5670 }
5671 else
5672 {
5673 if (dump_enabled_p ())
5674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5675 "operand mode requires invariant argument.\n");
5676 return false;
5677 }
5678
5679 /* Vector shifted by vector. */
5680 bool was_scalar_shift_arg = scalar_shift_arg;
5681 if (!scalar_shift_arg)
5682 {
5683 optab = optab_for_tree_code (code, vectype, optab_vector);
5684 if (dump_enabled_p ())
5685 dump_printf_loc (MSG_NOTE, vect_location,
5686 "vector/vector shift/rotate found.\n");
5687
5688 if (!op1_vectype)
5689 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5690 slp_op1);
5691 incompatible_op1_vectype_p
5692 = (op1_vectype == NULL_TREE
5693 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5694 TYPE_VECTOR_SUBPARTS (vectype))
5695 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5696 if (incompatible_op1_vectype_p
5697 && (!slp_node
5698 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5699 || slp_op1->refcnt != 1))
5700 {
5701 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5703 "unusable type for last operand in"
5704 " vector/vector shift/rotate.\n");
5705 return false;
5706 }
5707 }
5708 /* See if the machine has a vector shifted by scalar insn and if not
5709 then see if it has a vector shifted by vector insn. */
5710 else
5711 {
5712 optab = optab_for_tree_code (code, vectype, optab_scalar);
5713 if (optab
5714 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5715 {
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_NOTE, vect_location,
5718 "vector/scalar shift/rotate found.\n");
5719 }
5720 else
5721 {
5722 optab = optab_for_tree_code (code, vectype, optab_vector);
5723 if (optab
5724 && (optab_handler (optab, TYPE_MODE (vectype))
5725 != CODE_FOR_nothing))
5726 {
5727 scalar_shift_arg = false;
5728
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_NOTE, vect_location,
5731 "vector/vector shift/rotate found.\n");
5732
5733 if (!op1_vectype)
5734 op1_vectype = get_vectype_for_scalar_type (vinfo,
5735 TREE_TYPE (op1),
5736 slp_op1);
5737
5738 /* Unlike the other binary operators, shifts/rotates have
5739 the rhs being int, instead of the same type as the lhs,
5740 so make sure the scalar is the right type if we are
5741 dealing with vectors of long long/long/short/char. */
5742 incompatible_op1_vectype_p
5743 = (!op1_vectype
5744 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5745 TREE_TYPE (op1)));
5746 if (incompatible_op1_vectype_p
5747 && dt[1] == vect_internal_def)
5748 {
5749 if (dump_enabled_p ())
5750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5751 "unusable type for last operand in"
5752 " vector/vector shift/rotate.\n");
5753 return false;
5754 }
5755 }
5756 }
5757 }
5758
5759 /* Supportable by target? */
5760 if (!optab)
5761 {
5762 if (dump_enabled_p ())
5763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5764 "no optab.\n");
5765 return false;
5766 }
5767 vec_mode = TYPE_MODE (vectype);
5768 icode = (int) optab_handler (optab, vec_mode);
5769 if (icode == CODE_FOR_nothing)
5770 {
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773 "op not supported by target.\n");
5774 return false;
5775 }
5776 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5777 if (vect_emulated_vector_p (vectype))
5778 return false;
5779
5780 if (!vec_stmt) /* transformation not required. */
5781 {
5782 if (slp_node
5783 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5784 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5785 && (!incompatible_op1_vectype_p
5786 || dt[1] == vect_constant_def)
5787 && !vect_maybe_update_slp_op_vectype
5788 (slp_op1,
5789 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5790 {
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5793 "incompatible vector types for invariants\n");
5794 return false;
5795 }
5796 /* Now adjust the constant shift amount in place. */
5797 if (slp_node
5798 && incompatible_op1_vectype_p
5799 && dt[1] == vect_constant_def)
5800 {
5801 for (unsigned i = 0;
5802 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5803 {
5804 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5805 = fold_convert (TREE_TYPE (vectype),
5806 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5807 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5808 == INTEGER_CST));
5809 }
5810 }
5811 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5812 DUMP_VECT_SCOPE ("vectorizable_shift");
5813 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5814 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5815 return true;
5816 }
5817
5818 /* Transform. */
5819
5820 if (dump_enabled_p ())
5821 dump_printf_loc (MSG_NOTE, vect_location,
5822 "transform binary/unary operation.\n");
5823
5824 if (incompatible_op1_vectype_p && !slp_node)
5825 {
5826 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5827 op1 = fold_convert (TREE_TYPE (vectype), op1);
5828 if (dt[1] != vect_constant_def)
5829 op1 = vect_init_vector (vinfo, stmt_info, op1,
5830 TREE_TYPE (vectype), NULL);
5831 }
5832
5833 /* Handle def. */
5834 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5835
5836 if (scalar_shift_arg && dt[1] != vect_internal_def)
5837 {
5838 /* Vector shl and shr insn patterns can be defined with scalar
5839 operand 2 (shift operand). In this case, use constant or loop
5840 invariant op1 directly, without extending it to vector mode
5841 first. */
5842 optab_op2_mode = insn_data[icode].operand[2].mode;
5843 if (!VECTOR_MODE_P (optab_op2_mode))
5844 {
5845 if (dump_enabled_p ())
5846 dump_printf_loc (MSG_NOTE, vect_location,
5847 "operand 1 using scalar mode.\n");
5848 vec_oprnd1 = op1;
5849 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5850 vec_oprnds1.quick_push (vec_oprnd1);
5851 /* Store vec_oprnd1 for every vector stmt to be created.
5852 We check during the analysis that all the shift arguments
5853 are the same.
5854 TODO: Allow different constants for different vector
5855 stmts generated for an SLP instance. */
5856 for (k = 0;
5857 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5858 vec_oprnds1.quick_push (vec_oprnd1);
5859 }
5860 }
5861 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5862 {
5863 if (was_scalar_shift_arg)
5864 {
5865 /* If the argument was the same in all lanes create
5866 the correctly typed vector shift amount directly. */
5867 op1 = fold_convert (TREE_TYPE (vectype), op1);
5868 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5869 !loop_vinfo ? gsi : NULL);
5870 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5871 !loop_vinfo ? gsi : NULL);
5872 vec_oprnds1.create (slp_node->vec_stmts_size);
5873 for (k = 0; k < slp_node->vec_stmts_size; k++)
5874 vec_oprnds1.quick_push (vec_oprnd1);
5875 }
5876 else if (dt[1] == vect_constant_def)
5877 /* The constant shift amount has been adjusted in place. */
5878 ;
5879 else
5880 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5881 }
5882
5883 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5884 (a special case for certain kind of vector shifts); otherwise,
5885 operand 1 should be of a vector type (the usual case). */
5886 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5887 op0, &vec_oprnds0,
5888 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5889
5890 /* Arguments are ready. Create the new vector stmt. */
5891 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5892 {
5893 /* For internal defs where we need to use a scalar shift arg
5894 extract the first lane. */
5895 if (scalar_shift_arg && dt[1] == vect_internal_def)
5896 {
5897 vop1 = vec_oprnds1[0];
5898 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5899 gassign *new_stmt
5900 = gimple_build_assign (new_temp,
5901 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5902 vop1,
5903 TYPE_SIZE (TREE_TYPE (new_temp)),
5904 bitsize_zero_node));
5905 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5906 vop1 = new_temp;
5907 }
5908 else
5909 vop1 = vec_oprnds1[i];
5910 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5911 new_temp = make_ssa_name (vec_dest, new_stmt);
5912 gimple_assign_set_lhs (new_stmt, new_temp);
5913 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5914 if (slp_node)
5915 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5916 else
5917 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5918 }
5919
5920 if (!slp_node)
5921 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5922
5923 vec_oprnds0.release ();
5924 vec_oprnds1.release ();
5925
5926 return true;
5927 }
5928
5929
5930 /* Function vectorizable_operation.
5931
5932 Check if STMT_INFO performs a binary, unary or ternary operation that can
5933 be vectorized.
5934 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5935 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5936 Return true if STMT_INFO is vectorizable in this way. */
5937
5938 static bool
5939 vectorizable_operation (vec_info *vinfo,
5940 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5941 gimple **vec_stmt, slp_tree slp_node,
5942 stmt_vector_for_cost *cost_vec)
5943 {
5944 tree vec_dest;
5945 tree scalar_dest;
5946 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5947 tree vectype;
5948 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5949 enum tree_code code, orig_code;
5950 machine_mode vec_mode;
5951 tree new_temp;
5952 int op_type;
5953 optab optab;
5954 bool target_support_p;
5955 enum vect_def_type dt[3]
5956 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5957 int ndts = 3;
5958 poly_uint64 nunits_in;
5959 poly_uint64 nunits_out;
5960 tree vectype_out;
5961 int ncopies, vec_num;
5962 int i;
5963 vec<tree> vec_oprnds0 = vNULL;
5964 vec<tree> vec_oprnds1 = vNULL;
5965 vec<tree> vec_oprnds2 = vNULL;
5966 tree vop0, vop1, vop2;
5967 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5968
5969 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5970 return false;
5971
5972 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5973 && ! vec_stmt)
5974 return false;
5975
5976 /* Is STMT a vectorizable binary/unary operation? */
5977 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5978 if (!stmt)
5979 return false;
5980
5981 /* Loads and stores are handled in vectorizable_{load,store}. */
5982 if (STMT_VINFO_DATA_REF (stmt_info))
5983 return false;
5984
5985 orig_code = code = gimple_assign_rhs_code (stmt);
5986
5987 /* Shifts are handled in vectorizable_shift. */
5988 if (code == LSHIFT_EXPR
5989 || code == RSHIFT_EXPR
5990 || code == LROTATE_EXPR
5991 || code == RROTATE_EXPR)
5992 return false;
5993
5994 /* Comparisons are handled in vectorizable_comparison. */
5995 if (TREE_CODE_CLASS (code) == tcc_comparison)
5996 return false;
5997
5998 /* Conditions are handled in vectorizable_condition. */
5999 if (code == COND_EXPR)
6000 return false;
6001
6002 /* For pointer addition and subtraction, we should use the normal
6003 plus and minus for the vector operation. */
6004 if (code == POINTER_PLUS_EXPR)
6005 code = PLUS_EXPR;
6006 if (code == POINTER_DIFF_EXPR)
6007 code = MINUS_EXPR;
6008
6009 /* Support only unary or binary operations. */
6010 op_type = TREE_CODE_LENGTH (code);
6011 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6012 {
6013 if (dump_enabled_p ())
6014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6015 "num. args = %d (not unary/binary/ternary op).\n",
6016 op_type);
6017 return false;
6018 }
6019
6020 scalar_dest = gimple_assign_lhs (stmt);
6021 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6022
6023 /* Most operations cannot handle bit-precision types without extra
6024 truncations. */
6025 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6026 if (!mask_op_p
6027 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6028 /* Exception are bitwise binary operations. */
6029 && code != BIT_IOR_EXPR
6030 && code != BIT_XOR_EXPR
6031 && code != BIT_AND_EXPR)
6032 {
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6035 "bit-precision arithmetic not supported.\n");
6036 return false;
6037 }
6038
6039 slp_tree slp_op0;
6040 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6041 0, &op0, &slp_op0, &dt[0], &vectype))
6042 {
6043 if (dump_enabled_p ())
6044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6045 "use not simple.\n");
6046 return false;
6047 }
6048 /* If op0 is an external or constant def, infer the vector type
6049 from the scalar type. */
6050 if (!vectype)
6051 {
6052 /* For boolean type we cannot determine vectype by
6053 invariant value (don't know whether it is a vector
6054 of booleans or vector of integers). We use output
6055 vectype because operations on boolean don't change
6056 type. */
6057 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6058 {
6059 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6060 {
6061 if (dump_enabled_p ())
6062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6063 "not supported operation on bool value.\n");
6064 return false;
6065 }
6066 vectype = vectype_out;
6067 }
6068 else
6069 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6070 slp_node);
6071 }
6072 if (vec_stmt)
6073 gcc_assert (vectype);
6074 if (!vectype)
6075 {
6076 if (dump_enabled_p ())
6077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6078 "no vectype for scalar type %T\n",
6079 TREE_TYPE (op0));
6080
6081 return false;
6082 }
6083
6084 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6085 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6086 if (maybe_ne (nunits_out, nunits_in))
6087 return false;
6088
6089 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6090 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6091 if (op_type == binary_op || op_type == ternary_op)
6092 {
6093 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6094 1, &op1, &slp_op1, &dt[1], &vectype2))
6095 {
6096 if (dump_enabled_p ())
6097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6098 "use not simple.\n");
6099 return false;
6100 }
6101 }
6102 if (op_type == ternary_op)
6103 {
6104 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6105 2, &op2, &slp_op2, &dt[2], &vectype3))
6106 {
6107 if (dump_enabled_p ())
6108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6109 "use not simple.\n");
6110 return false;
6111 }
6112 }
6113
6114 /* Multiple types in SLP are handled by creating the appropriate number of
6115 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6116 case of SLP. */
6117 if (slp_node)
6118 {
6119 ncopies = 1;
6120 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6121 }
6122 else
6123 {
6124 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6125 vec_num = 1;
6126 }
6127
6128 gcc_assert (ncopies >= 1);
6129
6130 /* Reject attempts to combine mask types with nonmask types, e.g. if
6131 we have an AND between a (nonmask) boolean loaded from memory and
6132 a (mask) boolean result of a comparison.
6133
6134 TODO: We could easily fix these cases up using pattern statements. */
6135 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6136 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6137 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6138 {
6139 if (dump_enabled_p ())
6140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6141 "mixed mask and nonmask vector types\n");
6142 return false;
6143 }
6144
6145 /* Supportable by target? */
6146
6147 vec_mode = TYPE_MODE (vectype);
6148 if (code == MULT_HIGHPART_EXPR)
6149 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6150 else
6151 {
6152 optab = optab_for_tree_code (code, vectype, optab_default);
6153 if (!optab)
6154 {
6155 if (dump_enabled_p ())
6156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6157 "no optab.\n");
6158 return false;
6159 }
6160 target_support_p = (optab_handler (optab, vec_mode)
6161 != CODE_FOR_nothing);
6162 }
6163
6164 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6165 if (!target_support_p)
6166 {
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169 "op not supported by target.\n");
6170 /* Check only during analysis. */
6171 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6172 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6173 return false;
6174 if (dump_enabled_p ())
6175 dump_printf_loc (MSG_NOTE, vect_location,
6176 "proceeding using word mode.\n");
6177 using_emulated_vectors_p = true;
6178 }
6179
6180 if (using_emulated_vectors_p
6181 && !vect_can_vectorize_without_simd_p (code))
6182 {
6183 if (dump_enabled_p ())
6184 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6185 return false;
6186 }
6187
6188 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6189 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6190 internal_fn cond_fn = get_conditional_internal_fn (code);
6191
6192 if (!vec_stmt) /* transformation not required. */
6193 {
6194 /* If this operation is part of a reduction, a fully-masked loop
6195 should only change the active lanes of the reduction chain,
6196 keeping the inactive lanes as-is. */
6197 if (loop_vinfo
6198 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6199 && reduc_idx >= 0)
6200 {
6201 if (cond_fn == IFN_LAST
6202 || !direct_internal_fn_supported_p (cond_fn, vectype,
6203 OPTIMIZE_FOR_SPEED))
6204 {
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207 "can't use a fully-masked loop because no"
6208 " conditional operation is available.\n");
6209 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6210 }
6211 else
6212 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6213 vectype, NULL);
6214 }
6215
6216 /* Put types on constant and invariant SLP children. */
6217 if (slp_node
6218 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6219 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6220 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6221 {
6222 if (dump_enabled_p ())
6223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6224 "incompatible vector types for invariants\n");
6225 return false;
6226 }
6227
6228 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6229 DUMP_VECT_SCOPE ("vectorizable_operation");
6230 vect_model_simple_cost (vinfo, stmt_info,
6231 ncopies, dt, ndts, slp_node, cost_vec);
6232 if (using_emulated_vectors_p)
6233 {
6234 /* The above vect_model_simple_cost call handles constants
6235 in the prologue and (mis-)costs one of the stmts as
6236 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6237 for the actual lowering that will be applied. */
6238 unsigned n
6239 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6240 switch (code)
6241 {
6242 case PLUS_EXPR:
6243 n *= 5;
6244 break;
6245 case MINUS_EXPR:
6246 n *= 6;
6247 break;
6248 case NEGATE_EXPR:
6249 n *= 4;
6250 break;
6251 default:;
6252 }
6253 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6254 }
6255 return true;
6256 }
6257
6258 /* Transform. */
6259
6260 if (dump_enabled_p ())
6261 dump_printf_loc (MSG_NOTE, vect_location,
6262 "transform binary/unary operation.\n");
6263
6264 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6265
6266 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6267 vectors with unsigned elements, but the result is signed. So, we
6268 need to compute the MINUS_EXPR into vectype temporary and
6269 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6270 tree vec_cvt_dest = NULL_TREE;
6271 if (orig_code == POINTER_DIFF_EXPR)
6272 {
6273 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6274 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6275 }
6276 /* Handle def. */
6277 else
6278 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6279
6280 /* In case the vectorization factor (VF) is bigger than the number
6281 of elements that we can fit in a vectype (nunits), we have to generate
6282 more than one vector stmt - i.e - we need to "unroll" the
6283 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6284 from one copy of the vector stmt to the next, in the field
6285 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6286 stages to find the correct vector defs to be used when vectorizing
6287 stmts that use the defs of the current stmt. The example below
6288 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6289 we need to create 4 vectorized stmts):
6290
6291 before vectorization:
6292 RELATED_STMT VEC_STMT
6293 S1: x = memref - -
6294 S2: z = x + 1 - -
6295
6296 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6297 there):
6298 RELATED_STMT VEC_STMT
6299 VS1_0: vx0 = memref0 VS1_1 -
6300 VS1_1: vx1 = memref1 VS1_2 -
6301 VS1_2: vx2 = memref2 VS1_3 -
6302 VS1_3: vx3 = memref3 - -
6303 S1: x = load - VS1_0
6304 S2: z = x + 1 - -
6305
6306 step2: vectorize stmt S2 (done here):
6307 To vectorize stmt S2 we first need to find the relevant vector
6308 def for the first operand 'x'. This is, as usual, obtained from
6309 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6310 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6311 relevant vector def 'vx0'. Having found 'vx0' we can generate
6312 the vector stmt VS2_0, and as usual, record it in the
6313 STMT_VINFO_VEC_STMT of stmt S2.
6314 When creating the second copy (VS2_1), we obtain the relevant vector
6315 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6316 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6317 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6318 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6319 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6320 chain of stmts and pointers:
6321 RELATED_STMT VEC_STMT
6322 VS1_0: vx0 = memref0 VS1_1 -
6323 VS1_1: vx1 = memref1 VS1_2 -
6324 VS1_2: vx2 = memref2 VS1_3 -
6325 VS1_3: vx3 = memref3 - -
6326 S1: x = load - VS1_0
6327 VS2_0: vz0 = vx0 + v1 VS2_1 -
6328 VS2_1: vz1 = vx1 + v1 VS2_2 -
6329 VS2_2: vz2 = vx2 + v1 VS2_3 -
6330 VS2_3: vz3 = vx3 + v1 - -
6331 S2: z = x + 1 - VS2_0 */
6332
6333 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6334 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6335 /* Arguments are ready. Create the new vector stmt. */
6336 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6337 {
6338 gimple *new_stmt = NULL;
6339 vop1 = ((op_type == binary_op || op_type == ternary_op)
6340 ? vec_oprnds1[i] : NULL_TREE);
6341 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6342 if (masked_loop_p && reduc_idx >= 0)
6343 {
6344 /* Perform the operation on active elements only and take
6345 inactive elements from the reduction chain input. */
6346 gcc_assert (!vop2);
6347 vop2 = reduc_idx == 1 ? vop1 : vop0;
6348 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6349 vectype, i);
6350 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6351 vop0, vop1, vop2);
6352 new_temp = make_ssa_name (vec_dest, call);
6353 gimple_call_set_lhs (call, new_temp);
6354 gimple_call_set_nothrow (call, true);
6355 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6356 new_stmt = call;
6357 }
6358 else
6359 {
6360 tree mask = NULL_TREE;
6361 /* When combining two masks check if either of them is elsewhere
6362 combined with a loop mask, if that's the case we can mark that the
6363 new combined mask doesn't need to be combined with a loop mask. */
6364 if (masked_loop_p
6365 && code == BIT_AND_EXPR
6366 && VECTOR_BOOLEAN_TYPE_P (vectype))
6367 {
6368 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6369 ncopies}))
6370 {
6371 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6372 vectype, i);
6373
6374 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6375 vop0, gsi);
6376 }
6377
6378 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6379 ncopies }))
6380 {
6381 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6382 vectype, i);
6383
6384 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6385 vop1, gsi);
6386 }
6387 }
6388
6389 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6390 new_temp = make_ssa_name (vec_dest, new_stmt);
6391 gimple_assign_set_lhs (new_stmt, new_temp);
6392 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6393
6394 /* Enter the combined value into the vector cond hash so we don't
6395 AND it with a loop mask again. */
6396 if (mask)
6397 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6398
6399 if (vec_cvt_dest)
6400 {
6401 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6402 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6403 new_temp);
6404 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6405 gimple_assign_set_lhs (new_stmt, new_temp);
6406 vect_finish_stmt_generation (vinfo, stmt_info,
6407 new_stmt, gsi);
6408 }
6409 }
6410 if (slp_node)
6411 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6412 else
6413 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6414 }
6415
6416 if (!slp_node)
6417 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6418
6419 vec_oprnds0.release ();
6420 vec_oprnds1.release ();
6421 vec_oprnds2.release ();
6422
6423 return true;
6424 }
6425
6426 /* A helper function to ensure data reference DR_INFO's base alignment. */
6427
6428 static void
6429 ensure_base_align (dr_vec_info *dr_info)
6430 {
6431 /* Alignment is only analyzed for the first element of a DR group,
6432 use that to look at base alignment we need to enforce. */
6433 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6434 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6435
6436 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6437
6438 if (dr_info->base_misaligned)
6439 {
6440 tree base_decl = dr_info->base_decl;
6441
6442 // We should only be able to increase the alignment of a base object if
6443 // we know what its new alignment should be at compile time.
6444 unsigned HOST_WIDE_INT align_base_to =
6445 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6446
6447 if (decl_in_symtab_p (base_decl))
6448 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6449 else if (DECL_ALIGN (base_decl) < align_base_to)
6450 {
6451 SET_DECL_ALIGN (base_decl, align_base_to);
6452 DECL_USER_ALIGN (base_decl) = 1;
6453 }
6454 dr_info->base_misaligned = false;
6455 }
6456 }
6457
6458
6459 /* Function get_group_alias_ptr_type.
6460
6461 Return the alias type for the group starting at FIRST_STMT_INFO. */
6462
6463 static tree
6464 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6465 {
6466 struct data_reference *first_dr, *next_dr;
6467
6468 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6469 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6470 while (next_stmt_info)
6471 {
6472 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6473 if (get_alias_set (DR_REF (first_dr))
6474 != get_alias_set (DR_REF (next_dr)))
6475 {
6476 if (dump_enabled_p ())
6477 dump_printf_loc (MSG_NOTE, vect_location,
6478 "conflicting alias set types.\n");
6479 return ptr_type_node;
6480 }
6481 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6482 }
6483 return reference_alias_ptr_type (DR_REF (first_dr));
6484 }
6485
6486
6487 /* Function scan_operand_equal_p.
6488
6489 Helper function for check_scan_store. Compare two references
6490 with .GOMP_SIMD_LANE bases. */
6491
6492 static bool
6493 scan_operand_equal_p (tree ref1, tree ref2)
6494 {
6495 tree ref[2] = { ref1, ref2 };
6496 poly_int64 bitsize[2], bitpos[2];
6497 tree offset[2], base[2];
6498 for (int i = 0; i < 2; ++i)
6499 {
6500 machine_mode mode;
6501 int unsignedp, reversep, volatilep = 0;
6502 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6503 &offset[i], &mode, &unsignedp,
6504 &reversep, &volatilep);
6505 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6506 return false;
6507 if (TREE_CODE (base[i]) == MEM_REF
6508 && offset[i] == NULL_TREE
6509 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6510 {
6511 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6512 if (is_gimple_assign (def_stmt)
6513 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6514 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6515 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6516 {
6517 if (maybe_ne (mem_ref_offset (base[i]), 0))
6518 return false;
6519 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6520 offset[i] = gimple_assign_rhs2 (def_stmt);
6521 }
6522 }
6523 }
6524
6525 if (!operand_equal_p (base[0], base[1], 0))
6526 return false;
6527 if (maybe_ne (bitsize[0], bitsize[1]))
6528 return false;
6529 if (offset[0] != offset[1])
6530 {
6531 if (!offset[0] || !offset[1])
6532 return false;
6533 if (!operand_equal_p (offset[0], offset[1], 0))
6534 {
6535 tree step[2];
6536 for (int i = 0; i < 2; ++i)
6537 {
6538 step[i] = integer_one_node;
6539 if (TREE_CODE (offset[i]) == SSA_NAME)
6540 {
6541 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6542 if (is_gimple_assign (def_stmt)
6543 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6544 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6545 == INTEGER_CST))
6546 {
6547 step[i] = gimple_assign_rhs2 (def_stmt);
6548 offset[i] = gimple_assign_rhs1 (def_stmt);
6549 }
6550 }
6551 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6552 {
6553 step[i] = TREE_OPERAND (offset[i], 1);
6554 offset[i] = TREE_OPERAND (offset[i], 0);
6555 }
6556 tree rhs1 = NULL_TREE;
6557 if (TREE_CODE (offset[i]) == SSA_NAME)
6558 {
6559 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6560 if (gimple_assign_cast_p (def_stmt))
6561 rhs1 = gimple_assign_rhs1 (def_stmt);
6562 }
6563 else if (CONVERT_EXPR_P (offset[i]))
6564 rhs1 = TREE_OPERAND (offset[i], 0);
6565 if (rhs1
6566 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6567 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6568 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6569 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6570 offset[i] = rhs1;
6571 }
6572 if (!operand_equal_p (offset[0], offset[1], 0)
6573 || !operand_equal_p (step[0], step[1], 0))
6574 return false;
6575 }
6576 }
6577 return true;
6578 }
6579
6580
6581 enum scan_store_kind {
6582 /* Normal permutation. */
6583 scan_store_kind_perm,
6584
6585 /* Whole vector left shift permutation with zero init. */
6586 scan_store_kind_lshift_zero,
6587
6588 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6589 scan_store_kind_lshift_cond
6590 };
6591
6592 /* Function check_scan_store.
6593
6594 Verify if we can perform the needed permutations or whole vector shifts.
6595 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6596 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6597 to do at each step. */
6598
6599 static int
6600 scan_store_can_perm_p (tree vectype, tree init,
6601 vec<enum scan_store_kind> *use_whole_vector = NULL)
6602 {
6603 enum machine_mode vec_mode = TYPE_MODE (vectype);
6604 unsigned HOST_WIDE_INT nunits;
6605 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6606 return -1;
6607 int units_log2 = exact_log2 (nunits);
6608 if (units_log2 <= 0)
6609 return -1;
6610
6611 int i;
6612 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6613 for (i = 0; i <= units_log2; ++i)
6614 {
6615 unsigned HOST_WIDE_INT j, k;
6616 enum scan_store_kind kind = scan_store_kind_perm;
6617 vec_perm_builder sel (nunits, nunits, 1);
6618 sel.quick_grow (nunits);
6619 if (i == units_log2)
6620 {
6621 for (j = 0; j < nunits; ++j)
6622 sel[j] = nunits - 1;
6623 }
6624 else
6625 {
6626 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6627 sel[j] = j;
6628 for (k = 0; j < nunits; ++j, ++k)
6629 sel[j] = nunits + k;
6630 }
6631 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6632 if (!can_vec_perm_const_p (vec_mode, indices))
6633 {
6634 if (i == units_log2)
6635 return -1;
6636
6637 if (whole_vector_shift_kind == scan_store_kind_perm)
6638 {
6639 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6640 return -1;
6641 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6642 /* Whole vector shifts shift in zeros, so if init is all zero
6643 constant, there is no need to do anything further. */
6644 if ((TREE_CODE (init) != INTEGER_CST
6645 && TREE_CODE (init) != REAL_CST)
6646 || !initializer_zerop (init))
6647 {
6648 tree masktype = truth_type_for (vectype);
6649 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6650 return -1;
6651 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6652 }
6653 }
6654 kind = whole_vector_shift_kind;
6655 }
6656 if (use_whole_vector)
6657 {
6658 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6659 use_whole_vector->safe_grow_cleared (i, true);
6660 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6661 use_whole_vector->safe_push (kind);
6662 }
6663 }
6664
6665 return units_log2;
6666 }
6667
6668
6669 /* Function check_scan_store.
6670
6671 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6672
6673 static bool
6674 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6675 enum vect_def_type rhs_dt, bool slp, tree mask,
6676 vect_memory_access_type memory_access_type)
6677 {
6678 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6679 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6680 tree ref_type;
6681
6682 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6683 if (slp
6684 || mask
6685 || memory_access_type != VMAT_CONTIGUOUS
6686 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6687 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6688 || loop_vinfo == NULL
6689 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6690 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6691 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6692 || !integer_zerop (DR_INIT (dr_info->dr))
6693 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6694 || !alias_sets_conflict_p (get_alias_set (vectype),
6695 get_alias_set (TREE_TYPE (ref_type))))
6696 {
6697 if (dump_enabled_p ())
6698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6699 "unsupported OpenMP scan store.\n");
6700 return false;
6701 }
6702
6703 /* We need to pattern match code built by OpenMP lowering and simplified
6704 by following optimizations into something we can handle.
6705 #pragma omp simd reduction(inscan,+:r)
6706 for (...)
6707 {
6708 r += something ();
6709 #pragma omp scan inclusive (r)
6710 use (r);
6711 }
6712 shall have body with:
6713 // Initialization for input phase, store the reduction initializer:
6714 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6715 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6716 D.2042[_21] = 0;
6717 // Actual input phase:
6718 ...
6719 r.0_5 = D.2042[_20];
6720 _6 = _4 + r.0_5;
6721 D.2042[_20] = _6;
6722 // Initialization for scan phase:
6723 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6724 _26 = D.2043[_25];
6725 _27 = D.2042[_25];
6726 _28 = _26 + _27;
6727 D.2043[_25] = _28;
6728 D.2042[_25] = _28;
6729 // Actual scan phase:
6730 ...
6731 r.1_8 = D.2042[_20];
6732 ...
6733 The "omp simd array" variable D.2042 holds the privatized copy used
6734 inside of the loop and D.2043 is another one that holds copies of
6735 the current original list item. The separate GOMP_SIMD_LANE ifn
6736 kinds are there in order to allow optimizing the initializer store
6737 and combiner sequence, e.g. if it is originally some C++ish user
6738 defined reduction, but allow the vectorizer to pattern recognize it
6739 and turn into the appropriate vectorized scan.
6740
6741 For exclusive scan, this is slightly different:
6742 #pragma omp simd reduction(inscan,+:r)
6743 for (...)
6744 {
6745 use (r);
6746 #pragma omp scan exclusive (r)
6747 r += something ();
6748 }
6749 shall have body with:
6750 // Initialization for input phase, store the reduction initializer:
6751 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6752 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6753 D.2042[_21] = 0;
6754 // Actual input phase:
6755 ...
6756 r.0_5 = D.2042[_20];
6757 _6 = _4 + r.0_5;
6758 D.2042[_20] = _6;
6759 // Initialization for scan phase:
6760 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6761 _26 = D.2043[_25];
6762 D.2044[_25] = _26;
6763 _27 = D.2042[_25];
6764 _28 = _26 + _27;
6765 D.2043[_25] = _28;
6766 // Actual scan phase:
6767 ...
6768 r.1_8 = D.2044[_20];
6769 ... */
6770
6771 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6772 {
6773 /* Match the D.2042[_21] = 0; store above. Just require that
6774 it is a constant or external definition store. */
6775 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6776 {
6777 fail_init:
6778 if (dump_enabled_p ())
6779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6780 "unsupported OpenMP scan initializer store.\n");
6781 return false;
6782 }
6783
6784 if (! loop_vinfo->scan_map)
6785 loop_vinfo->scan_map = new hash_map<tree, tree>;
6786 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6787 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6788 if (cached)
6789 goto fail_init;
6790 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6791
6792 /* These stores can be vectorized normally. */
6793 return true;
6794 }
6795
6796 if (rhs_dt != vect_internal_def)
6797 {
6798 fail:
6799 if (dump_enabled_p ())
6800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6801 "unsupported OpenMP scan combiner pattern.\n");
6802 return false;
6803 }
6804
6805 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6806 tree rhs = gimple_assign_rhs1 (stmt);
6807 if (TREE_CODE (rhs) != SSA_NAME)
6808 goto fail;
6809
6810 gimple *other_store_stmt = NULL;
6811 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6812 bool inscan_var_store
6813 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6814
6815 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6816 {
6817 if (!inscan_var_store)
6818 {
6819 use_operand_p use_p;
6820 imm_use_iterator iter;
6821 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6822 {
6823 gimple *use_stmt = USE_STMT (use_p);
6824 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6825 continue;
6826 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6827 || !is_gimple_assign (use_stmt)
6828 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6829 || other_store_stmt
6830 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6831 goto fail;
6832 other_store_stmt = use_stmt;
6833 }
6834 if (other_store_stmt == NULL)
6835 goto fail;
6836 rhs = gimple_assign_lhs (other_store_stmt);
6837 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6838 goto fail;
6839 }
6840 }
6841 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6842 {
6843 use_operand_p use_p;
6844 imm_use_iterator iter;
6845 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6846 {
6847 gimple *use_stmt = USE_STMT (use_p);
6848 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6849 continue;
6850 if (other_store_stmt)
6851 goto fail;
6852 other_store_stmt = use_stmt;
6853 }
6854 }
6855 else
6856 goto fail;
6857
6858 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6859 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6860 || !is_gimple_assign (def_stmt)
6861 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6862 goto fail;
6863
6864 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6865 /* For pointer addition, we should use the normal plus for the vector
6866 operation. */
6867 switch (code)
6868 {
6869 case POINTER_PLUS_EXPR:
6870 code = PLUS_EXPR;
6871 break;
6872 case MULT_HIGHPART_EXPR:
6873 goto fail;
6874 default:
6875 break;
6876 }
6877 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6878 goto fail;
6879
6880 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6881 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6882 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6883 goto fail;
6884
6885 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6886 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6887 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6888 || !gimple_assign_load_p (load1_stmt)
6889 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6890 || !gimple_assign_load_p (load2_stmt))
6891 goto fail;
6892
6893 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6894 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6895 if (load1_stmt_info == NULL
6896 || load2_stmt_info == NULL
6897 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6898 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6899 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6900 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6901 goto fail;
6902
6903 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6904 {
6905 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6906 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6907 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6908 goto fail;
6909 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6910 tree lrhs;
6911 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6912 lrhs = rhs1;
6913 else
6914 lrhs = rhs2;
6915 use_operand_p use_p;
6916 imm_use_iterator iter;
6917 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6918 {
6919 gimple *use_stmt = USE_STMT (use_p);
6920 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6921 continue;
6922 if (other_store_stmt)
6923 goto fail;
6924 other_store_stmt = use_stmt;
6925 }
6926 }
6927
6928 if (other_store_stmt == NULL)
6929 goto fail;
6930 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6931 || !gimple_store_p (other_store_stmt))
6932 goto fail;
6933
6934 stmt_vec_info other_store_stmt_info
6935 = loop_vinfo->lookup_stmt (other_store_stmt);
6936 if (other_store_stmt_info == NULL
6937 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6938 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6939 goto fail;
6940
6941 gimple *stmt1 = stmt;
6942 gimple *stmt2 = other_store_stmt;
6943 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6944 std::swap (stmt1, stmt2);
6945 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6946 gimple_assign_rhs1 (load2_stmt)))
6947 {
6948 std::swap (rhs1, rhs2);
6949 std::swap (load1_stmt, load2_stmt);
6950 std::swap (load1_stmt_info, load2_stmt_info);
6951 }
6952 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6953 gimple_assign_rhs1 (load1_stmt)))
6954 goto fail;
6955
6956 tree var3 = NULL_TREE;
6957 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6958 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6959 gimple_assign_rhs1 (load2_stmt)))
6960 goto fail;
6961 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6962 {
6963 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6964 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6965 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6966 goto fail;
6967 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6968 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6969 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6970 || lookup_attribute ("omp simd inscan exclusive",
6971 DECL_ATTRIBUTES (var3)))
6972 goto fail;
6973 }
6974
6975 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6976 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6977 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6978 goto fail;
6979
6980 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6981 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6982 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6983 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6984 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6985 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6986 goto fail;
6987
6988 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6989 std::swap (var1, var2);
6990
6991 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6992 {
6993 if (!lookup_attribute ("omp simd inscan exclusive",
6994 DECL_ATTRIBUTES (var1)))
6995 goto fail;
6996 var1 = var3;
6997 }
6998
6999 if (loop_vinfo->scan_map == NULL)
7000 goto fail;
7001 tree *init = loop_vinfo->scan_map->get (var1);
7002 if (init == NULL)
7003 goto fail;
7004
7005 /* The IL is as expected, now check if we can actually vectorize it.
7006 Inclusive scan:
7007 _26 = D.2043[_25];
7008 _27 = D.2042[_25];
7009 _28 = _26 + _27;
7010 D.2043[_25] = _28;
7011 D.2042[_25] = _28;
7012 should be vectorized as (where _40 is the vectorized rhs
7013 from the D.2042[_21] = 0; store):
7014 _30 = MEM <vector(8) int> [(int *)&D.2043];
7015 _31 = MEM <vector(8) int> [(int *)&D.2042];
7016 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7017 _33 = _31 + _32;
7018 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7019 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7020 _35 = _33 + _34;
7021 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7022 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7023 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7024 _37 = _35 + _36;
7025 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7026 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7027 _38 = _30 + _37;
7028 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7029 MEM <vector(8) int> [(int *)&D.2043] = _39;
7030 MEM <vector(8) int> [(int *)&D.2042] = _38;
7031 Exclusive scan:
7032 _26 = D.2043[_25];
7033 D.2044[_25] = _26;
7034 _27 = D.2042[_25];
7035 _28 = _26 + _27;
7036 D.2043[_25] = _28;
7037 should be vectorized as (where _40 is the vectorized rhs
7038 from the D.2042[_21] = 0; store):
7039 _30 = MEM <vector(8) int> [(int *)&D.2043];
7040 _31 = MEM <vector(8) int> [(int *)&D.2042];
7041 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7042 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7043 _34 = _32 + _33;
7044 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7045 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7046 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7047 _36 = _34 + _35;
7048 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7049 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7050 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7051 _38 = _36 + _37;
7052 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7053 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7054 _39 = _30 + _38;
7055 _50 = _31 + _39;
7056 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7057 MEM <vector(8) int> [(int *)&D.2044] = _39;
7058 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7059 enum machine_mode vec_mode = TYPE_MODE (vectype);
7060 optab optab = optab_for_tree_code (code, vectype, optab_default);
7061 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7062 goto fail;
7063
7064 int units_log2 = scan_store_can_perm_p (vectype, *init);
7065 if (units_log2 == -1)
7066 goto fail;
7067
7068 return true;
7069 }
7070
7071
7072 /* Function vectorizable_scan_store.
7073
7074 Helper of vectorizable_score, arguments like on vectorizable_store.
7075 Handle only the transformation, checking is done in check_scan_store. */
7076
7077 static bool
7078 vectorizable_scan_store (vec_info *vinfo,
7079 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7080 gimple **vec_stmt, int ncopies)
7081 {
7082 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7083 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7084 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7085 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7086
7087 if (dump_enabled_p ())
7088 dump_printf_loc (MSG_NOTE, vect_location,
7089 "transform scan store. ncopies = %d\n", ncopies);
7090
7091 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7092 tree rhs = gimple_assign_rhs1 (stmt);
7093 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7094
7095 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7096 bool inscan_var_store
7097 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7098
7099 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7100 {
7101 use_operand_p use_p;
7102 imm_use_iterator iter;
7103 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7104 {
7105 gimple *use_stmt = USE_STMT (use_p);
7106 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7107 continue;
7108 rhs = gimple_assign_lhs (use_stmt);
7109 break;
7110 }
7111 }
7112
7113 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7114 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7115 if (code == POINTER_PLUS_EXPR)
7116 code = PLUS_EXPR;
7117 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7118 && commutative_tree_code (code));
7119 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7120 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7121 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7122 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7123 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7124 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7125 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7126 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7127 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7128 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7129 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7130
7131 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7132 {
7133 std::swap (rhs1, rhs2);
7134 std::swap (var1, var2);
7135 std::swap (load1_dr_info, load2_dr_info);
7136 }
7137
7138 tree *init = loop_vinfo->scan_map->get (var1);
7139 gcc_assert (init);
7140
7141 unsigned HOST_WIDE_INT nunits;
7142 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7143 gcc_unreachable ();
7144 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7145 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7146 gcc_assert (units_log2 > 0);
7147 auto_vec<tree, 16> perms;
7148 perms.quick_grow (units_log2 + 1);
7149 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7150 for (int i = 0; i <= units_log2; ++i)
7151 {
7152 unsigned HOST_WIDE_INT j, k;
7153 vec_perm_builder sel (nunits, nunits, 1);
7154 sel.quick_grow (nunits);
7155 if (i == units_log2)
7156 for (j = 0; j < nunits; ++j)
7157 sel[j] = nunits - 1;
7158 else
7159 {
7160 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7161 sel[j] = j;
7162 for (k = 0; j < nunits; ++j, ++k)
7163 sel[j] = nunits + k;
7164 }
7165 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7166 if (!use_whole_vector.is_empty ()
7167 && use_whole_vector[i] != scan_store_kind_perm)
7168 {
7169 if (zero_vec == NULL_TREE)
7170 zero_vec = build_zero_cst (vectype);
7171 if (masktype == NULL_TREE
7172 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7173 masktype = truth_type_for (vectype);
7174 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7175 }
7176 else
7177 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7178 }
7179
7180 tree vec_oprnd1 = NULL_TREE;
7181 tree vec_oprnd2 = NULL_TREE;
7182 tree vec_oprnd3 = NULL_TREE;
7183 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7184 tree dataref_offset = build_int_cst (ref_type, 0);
7185 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7186 vectype, VMAT_CONTIGUOUS);
7187 tree ldataref_ptr = NULL_TREE;
7188 tree orig = NULL_TREE;
7189 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7190 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7191 auto_vec<tree> vec_oprnds1;
7192 auto_vec<tree> vec_oprnds2;
7193 auto_vec<tree> vec_oprnds3;
7194 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7195 *init, &vec_oprnds1,
7196 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7197 rhs2, &vec_oprnds3);
7198 for (int j = 0; j < ncopies; j++)
7199 {
7200 vec_oprnd1 = vec_oprnds1[j];
7201 if (ldataref_ptr == NULL)
7202 vec_oprnd2 = vec_oprnds2[j];
7203 vec_oprnd3 = vec_oprnds3[j];
7204 if (j == 0)
7205 orig = vec_oprnd3;
7206 else if (!inscan_var_store)
7207 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7208
7209 if (ldataref_ptr)
7210 {
7211 vec_oprnd2 = make_ssa_name (vectype);
7212 tree data_ref = fold_build2 (MEM_REF, vectype,
7213 unshare_expr (ldataref_ptr),
7214 dataref_offset);
7215 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7216 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7217 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7218 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7219 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7220 }
7221
7222 tree v = vec_oprnd2;
7223 for (int i = 0; i < units_log2; ++i)
7224 {
7225 tree new_temp = make_ssa_name (vectype);
7226 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7227 (zero_vec
7228 && (use_whole_vector[i]
7229 != scan_store_kind_perm))
7230 ? zero_vec : vec_oprnd1, v,
7231 perms[i]);
7232 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7233 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7234 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7235
7236 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7237 {
7238 /* Whole vector shift shifted in zero bits, but if *init
7239 is not initializer_zerop, we need to replace those elements
7240 with elements from vec_oprnd1. */
7241 tree_vector_builder vb (masktype, nunits, 1);
7242 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7243 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7244 ? boolean_false_node : boolean_true_node);
7245
7246 tree new_temp2 = make_ssa_name (vectype);
7247 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7248 new_temp, vec_oprnd1);
7249 vect_finish_stmt_generation (vinfo, stmt_info,
7250 g, gsi);
7251 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7252 new_temp = new_temp2;
7253 }
7254
7255 /* For exclusive scan, perform the perms[i] permutation once
7256 more. */
7257 if (i == 0
7258 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7259 && v == vec_oprnd2)
7260 {
7261 v = new_temp;
7262 --i;
7263 continue;
7264 }
7265
7266 tree new_temp2 = make_ssa_name (vectype);
7267 g = gimple_build_assign (new_temp2, code, v, new_temp);
7268 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7269 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7270
7271 v = new_temp2;
7272 }
7273
7274 tree new_temp = make_ssa_name (vectype);
7275 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7276 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7277 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7278
7279 tree last_perm_arg = new_temp;
7280 /* For exclusive scan, new_temp computed above is the exclusive scan
7281 prefix sum. Turn it into inclusive prefix sum for the broadcast
7282 of the last element into orig. */
7283 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7284 {
7285 last_perm_arg = make_ssa_name (vectype);
7286 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7287 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7288 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7289 }
7290
7291 orig = make_ssa_name (vectype);
7292 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7293 last_perm_arg, perms[units_log2]);
7294 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7295 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7296
7297 if (!inscan_var_store)
7298 {
7299 tree data_ref = fold_build2 (MEM_REF, vectype,
7300 unshare_expr (dataref_ptr),
7301 dataref_offset);
7302 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7303 g = gimple_build_assign (data_ref, new_temp);
7304 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7305 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7306 }
7307 }
7308
7309 if (inscan_var_store)
7310 for (int j = 0; j < ncopies; j++)
7311 {
7312 if (j != 0)
7313 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7314
7315 tree data_ref = fold_build2 (MEM_REF, vectype,
7316 unshare_expr (dataref_ptr),
7317 dataref_offset);
7318 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7319 gimple *g = gimple_build_assign (data_ref, orig);
7320 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7321 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7322 }
7323 return true;
7324 }
7325
7326
7327 /* Function vectorizable_store.
7328
7329 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7330 that can be vectorized.
7331 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7332 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7333 Return true if STMT_INFO is vectorizable in this way. */
7334
7335 static bool
7336 vectorizable_store (vec_info *vinfo,
7337 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7338 gimple **vec_stmt, slp_tree slp_node,
7339 stmt_vector_for_cost *cost_vec)
7340 {
7341 tree data_ref;
7342 tree op;
7343 tree vec_oprnd = NULL_TREE;
7344 tree elem_type;
7345 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7346 class loop *loop = NULL;
7347 machine_mode vec_mode;
7348 tree dummy;
7349 enum vect_def_type rhs_dt = vect_unknown_def_type;
7350 enum vect_def_type mask_dt = vect_unknown_def_type;
7351 tree dataref_ptr = NULL_TREE;
7352 tree dataref_offset = NULL_TREE;
7353 gimple *ptr_incr = NULL;
7354 int ncopies;
7355 int j;
7356 stmt_vec_info first_stmt_info;
7357 bool grouped_store;
7358 unsigned int group_size, i;
7359 vec<tree> oprnds = vNULL;
7360 vec<tree> result_chain = vNULL;
7361 vec<tree> vec_oprnds = vNULL;
7362 bool slp = (slp_node != NULL);
7363 unsigned int vec_num;
7364 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7365 tree aggr_type;
7366 gather_scatter_info gs_info;
7367 poly_uint64 vf;
7368 vec_load_store_type vls_type;
7369 tree ref_type;
7370
7371 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7372 return false;
7373
7374 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7375 && ! vec_stmt)
7376 return false;
7377
7378 /* Is vectorizable store? */
7379
7380 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7381 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7382 {
7383 tree scalar_dest = gimple_assign_lhs (assign);
7384 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7385 && is_pattern_stmt_p (stmt_info))
7386 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7387 if (TREE_CODE (scalar_dest) != ARRAY_REF
7388 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7389 && TREE_CODE (scalar_dest) != INDIRECT_REF
7390 && TREE_CODE (scalar_dest) != COMPONENT_REF
7391 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7392 && TREE_CODE (scalar_dest) != REALPART_EXPR
7393 && TREE_CODE (scalar_dest) != MEM_REF)
7394 return false;
7395 }
7396 else
7397 {
7398 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7399 if (!call || !gimple_call_internal_p (call))
7400 return false;
7401
7402 internal_fn ifn = gimple_call_internal_fn (call);
7403 if (!internal_store_fn_p (ifn))
7404 return false;
7405
7406 if (slp_node != NULL)
7407 {
7408 if (dump_enabled_p ())
7409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7410 "SLP of masked stores not supported.\n");
7411 return false;
7412 }
7413
7414 int mask_index = internal_fn_mask_index (ifn);
7415 if (mask_index >= 0
7416 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7417 &mask, NULL, &mask_dt, &mask_vectype))
7418 return false;
7419 }
7420
7421 op = vect_get_store_rhs (stmt_info);
7422
7423 /* Cannot have hybrid store SLP -- that would mean storing to the
7424 same location twice. */
7425 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7426
7427 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7428 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7429
7430 if (loop_vinfo)
7431 {
7432 loop = LOOP_VINFO_LOOP (loop_vinfo);
7433 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7434 }
7435 else
7436 vf = 1;
7437
7438 /* Multiple types in SLP are handled by creating the appropriate number of
7439 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7440 case of SLP. */
7441 if (slp)
7442 ncopies = 1;
7443 else
7444 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7445
7446 gcc_assert (ncopies >= 1);
7447
7448 /* FORNOW. This restriction should be relaxed. */
7449 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7450 {
7451 if (dump_enabled_p ())
7452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7453 "multiple types in nested loop.\n");
7454 return false;
7455 }
7456
7457 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7458 op, &rhs_dt, &rhs_vectype, &vls_type))
7459 return false;
7460
7461 elem_type = TREE_TYPE (vectype);
7462 vec_mode = TYPE_MODE (vectype);
7463
7464 if (!STMT_VINFO_DATA_REF (stmt_info))
7465 return false;
7466
7467 vect_memory_access_type memory_access_type;
7468 enum dr_alignment_support alignment_support_scheme;
7469 int misalignment;
7470 poly_int64 poffset;
7471 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7472 ncopies, &memory_access_type, &poffset,
7473 &alignment_support_scheme, &misalignment, &gs_info))
7474 return false;
7475
7476 if (mask)
7477 {
7478 if (memory_access_type == VMAT_CONTIGUOUS)
7479 {
7480 if (!VECTOR_MODE_P (vec_mode)
7481 || !can_vec_mask_load_store_p (vec_mode,
7482 TYPE_MODE (mask_vectype), false))
7483 return false;
7484 }
7485 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7486 && (memory_access_type != VMAT_GATHER_SCATTER
7487 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7488 {
7489 if (dump_enabled_p ())
7490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7491 "unsupported access type for masked store.\n");
7492 return false;
7493 }
7494 }
7495 else
7496 {
7497 /* FORNOW. In some cases can vectorize even if data-type not supported
7498 (e.g. - array initialization with 0). */
7499 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7500 return false;
7501 }
7502
7503 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7504 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7505 && memory_access_type != VMAT_GATHER_SCATTER
7506 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7507 if (grouped_store)
7508 {
7509 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7510 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7511 group_size = DR_GROUP_SIZE (first_stmt_info);
7512 }
7513 else
7514 {
7515 first_stmt_info = stmt_info;
7516 first_dr_info = dr_info;
7517 group_size = vec_num = 1;
7518 }
7519
7520 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7521 {
7522 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7523 memory_access_type))
7524 return false;
7525 }
7526
7527 if (!vec_stmt) /* transformation not required. */
7528 {
7529 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7530
7531 if (loop_vinfo
7532 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7533 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7534 group_size, memory_access_type,
7535 ncopies, &gs_info, mask);
7536
7537 if (slp_node
7538 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7539 vectype))
7540 {
7541 if (dump_enabled_p ())
7542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7543 "incompatible vector types for invariants\n");
7544 return false;
7545 }
7546
7547 if (dump_enabled_p ()
7548 && memory_access_type != VMAT_ELEMENTWISE
7549 && memory_access_type != VMAT_GATHER_SCATTER
7550 && alignment_support_scheme != dr_aligned)
7551 dump_printf_loc (MSG_NOTE, vect_location,
7552 "Vectorizing an unaligned access.\n");
7553
7554 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7555 vect_model_store_cost (vinfo, stmt_info, ncopies,
7556 memory_access_type, alignment_support_scheme,
7557 misalignment, vls_type, slp_node, cost_vec);
7558 return true;
7559 }
7560 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7561
7562 /* Transform. */
7563
7564 ensure_base_align (dr_info);
7565
7566 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7567 {
7568 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7569 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7570 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7571 tree ptr, var, scale, vec_mask;
7572 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7573 tree mask_halfvectype = mask_vectype;
7574 edge pe = loop_preheader_edge (loop);
7575 gimple_seq seq;
7576 basic_block new_bb;
7577 enum { NARROW, NONE, WIDEN } modifier;
7578 poly_uint64 scatter_off_nunits
7579 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7580
7581 if (known_eq (nunits, scatter_off_nunits))
7582 modifier = NONE;
7583 else if (known_eq (nunits * 2, scatter_off_nunits))
7584 {
7585 modifier = WIDEN;
7586
7587 /* Currently gathers and scatters are only supported for
7588 fixed-length vectors. */
7589 unsigned int count = scatter_off_nunits.to_constant ();
7590 vec_perm_builder sel (count, count, 1);
7591 for (i = 0; i < (unsigned int) count; ++i)
7592 sel.quick_push (i | (count / 2));
7593
7594 vec_perm_indices indices (sel, 1, count);
7595 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7596 indices);
7597 gcc_assert (perm_mask != NULL_TREE);
7598 }
7599 else if (known_eq (nunits, scatter_off_nunits * 2))
7600 {
7601 modifier = NARROW;
7602
7603 /* Currently gathers and scatters are only supported for
7604 fixed-length vectors. */
7605 unsigned int count = nunits.to_constant ();
7606 vec_perm_builder sel (count, count, 1);
7607 for (i = 0; i < (unsigned int) count; ++i)
7608 sel.quick_push (i | (count / 2));
7609
7610 vec_perm_indices indices (sel, 2, count);
7611 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7612 gcc_assert (perm_mask != NULL_TREE);
7613 ncopies *= 2;
7614
7615 if (mask)
7616 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7617 }
7618 else
7619 gcc_unreachable ();
7620
7621 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7622 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7623 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7624 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7625 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7626 scaletype = TREE_VALUE (arglist);
7627
7628 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7629 && TREE_CODE (rettype) == VOID_TYPE);
7630
7631 ptr = fold_convert (ptrtype, gs_info.base);
7632 if (!is_gimple_min_invariant (ptr))
7633 {
7634 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7635 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7636 gcc_assert (!new_bb);
7637 }
7638
7639 if (mask == NULL_TREE)
7640 {
7641 mask_arg = build_int_cst (masktype, -1);
7642 mask_arg = vect_init_vector (vinfo, stmt_info,
7643 mask_arg, masktype, NULL);
7644 }
7645
7646 scale = build_int_cst (scaletype, gs_info.scale);
7647
7648 auto_vec<tree> vec_oprnds0;
7649 auto_vec<tree> vec_oprnds1;
7650 auto_vec<tree> vec_masks;
7651 if (mask)
7652 {
7653 tree mask_vectype = truth_type_for (vectype);
7654 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7655 modifier == NARROW
7656 ? ncopies / 2 : ncopies,
7657 mask, &vec_masks, mask_vectype);
7658 }
7659 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7660 modifier == WIDEN
7661 ? ncopies / 2 : ncopies,
7662 gs_info.offset, &vec_oprnds0);
7663 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7664 modifier == NARROW
7665 ? ncopies / 2 : ncopies,
7666 op, &vec_oprnds1);
7667 for (j = 0; j < ncopies; ++j)
7668 {
7669 if (modifier == WIDEN)
7670 {
7671 if (j & 1)
7672 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7673 perm_mask, stmt_info, gsi);
7674 else
7675 op = vec_oprnd0 = vec_oprnds0[j / 2];
7676 src = vec_oprnd1 = vec_oprnds1[j];
7677 if (mask)
7678 mask_op = vec_mask = vec_masks[j];
7679 }
7680 else if (modifier == NARROW)
7681 {
7682 if (j & 1)
7683 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7684 perm_mask, stmt_info, gsi);
7685 else
7686 src = vec_oprnd1 = vec_oprnds1[j / 2];
7687 op = vec_oprnd0 = vec_oprnds0[j];
7688 if (mask)
7689 mask_op = vec_mask = vec_masks[j / 2];
7690 }
7691 else
7692 {
7693 op = vec_oprnd0 = vec_oprnds0[j];
7694 src = vec_oprnd1 = vec_oprnds1[j];
7695 if (mask)
7696 mask_op = vec_mask = vec_masks[j];
7697 }
7698
7699 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7700 {
7701 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7702 TYPE_VECTOR_SUBPARTS (srctype)));
7703 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7704 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7705 gassign *new_stmt
7706 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7707 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7708 src = var;
7709 }
7710
7711 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7712 {
7713 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7714 TYPE_VECTOR_SUBPARTS (idxtype)));
7715 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7716 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7717 gassign *new_stmt
7718 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7719 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7720 op = var;
7721 }
7722
7723 if (mask)
7724 {
7725 tree utype;
7726 mask_arg = mask_op;
7727 if (modifier == NARROW)
7728 {
7729 var = vect_get_new_ssa_name (mask_halfvectype,
7730 vect_simple_var);
7731 gassign *new_stmt
7732 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7733 : VEC_UNPACK_LO_EXPR,
7734 mask_op);
7735 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7736 mask_arg = var;
7737 }
7738 tree optype = TREE_TYPE (mask_arg);
7739 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7740 utype = masktype;
7741 else
7742 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7743 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7744 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7745 gassign *new_stmt
7746 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7747 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7748 mask_arg = var;
7749 if (!useless_type_conversion_p (masktype, utype))
7750 {
7751 gcc_assert (TYPE_PRECISION (utype)
7752 <= TYPE_PRECISION (masktype));
7753 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7754 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7755 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7756 mask_arg = var;
7757 }
7758 }
7759
7760 gcall *new_stmt
7761 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7762 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7763
7764 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7765 }
7766 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7767 return true;
7768 }
7769 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7770 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7771
7772 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7773 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7774
7775 if (grouped_store)
7776 {
7777 /* FORNOW */
7778 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7779
7780 /* We vectorize all the stmts of the interleaving group when we
7781 reach the last stmt in the group. */
7782 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7783 < DR_GROUP_SIZE (first_stmt_info)
7784 && !slp)
7785 {
7786 *vec_stmt = NULL;
7787 return true;
7788 }
7789
7790 if (slp)
7791 {
7792 grouped_store = false;
7793 /* VEC_NUM is the number of vect stmts to be created for this
7794 group. */
7795 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7796 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7797 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7798 == first_stmt_info);
7799 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7800 op = vect_get_store_rhs (first_stmt_info);
7801 }
7802 else
7803 /* VEC_NUM is the number of vect stmts to be created for this
7804 group. */
7805 vec_num = group_size;
7806
7807 ref_type = get_group_alias_ptr_type (first_stmt_info);
7808 }
7809 else
7810 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7811
7812 if (dump_enabled_p ())
7813 dump_printf_loc (MSG_NOTE, vect_location,
7814 "transform store. ncopies = %d\n", ncopies);
7815
7816 if (memory_access_type == VMAT_ELEMENTWISE
7817 || memory_access_type == VMAT_STRIDED_SLP)
7818 {
7819 gimple_stmt_iterator incr_gsi;
7820 bool insert_after;
7821 gimple *incr;
7822 tree offvar;
7823 tree ivstep;
7824 tree running_off;
7825 tree stride_base, stride_step, alias_off;
7826 tree vec_oprnd;
7827 tree dr_offset;
7828 unsigned int g;
7829 /* Checked by get_load_store_type. */
7830 unsigned int const_nunits = nunits.to_constant ();
7831
7832 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7833 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7834
7835 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7836 stride_base
7837 = fold_build_pointer_plus
7838 (DR_BASE_ADDRESS (first_dr_info->dr),
7839 size_binop (PLUS_EXPR,
7840 convert_to_ptrofftype (dr_offset),
7841 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7842 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7843
7844 /* For a store with loop-invariant (but other than power-of-2)
7845 stride (i.e. not a grouped access) like so:
7846
7847 for (i = 0; i < n; i += stride)
7848 array[i] = ...;
7849
7850 we generate a new induction variable and new stores from
7851 the components of the (vectorized) rhs:
7852
7853 for (j = 0; ; j += VF*stride)
7854 vectemp = ...;
7855 tmp1 = vectemp[0];
7856 array[j] = tmp1;
7857 tmp2 = vectemp[1];
7858 array[j + stride] = tmp2;
7859 ...
7860 */
7861
7862 unsigned nstores = const_nunits;
7863 unsigned lnel = 1;
7864 tree ltype = elem_type;
7865 tree lvectype = vectype;
7866 if (slp)
7867 {
7868 if (group_size < const_nunits
7869 && const_nunits % group_size == 0)
7870 {
7871 nstores = const_nunits / group_size;
7872 lnel = group_size;
7873 ltype = build_vector_type (elem_type, group_size);
7874 lvectype = vectype;
7875
7876 /* First check if vec_extract optab doesn't support extraction
7877 of vector elts directly. */
7878 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7879 machine_mode vmode;
7880 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7881 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7882 group_size).exists (&vmode)
7883 || (convert_optab_handler (vec_extract_optab,
7884 TYPE_MODE (vectype), vmode)
7885 == CODE_FOR_nothing))
7886 {
7887 /* Try to avoid emitting an extract of vector elements
7888 by performing the extracts using an integer type of the
7889 same size, extracting from a vector of those and then
7890 re-interpreting it as the original vector type if
7891 supported. */
7892 unsigned lsize
7893 = group_size * GET_MODE_BITSIZE (elmode);
7894 unsigned int lnunits = const_nunits / group_size;
7895 /* If we can't construct such a vector fall back to
7896 element extracts from the original vector type and
7897 element size stores. */
7898 if (int_mode_for_size (lsize, 0).exists (&elmode)
7899 && VECTOR_MODE_P (TYPE_MODE (vectype))
7900 && related_vector_mode (TYPE_MODE (vectype), elmode,
7901 lnunits).exists (&vmode)
7902 && (convert_optab_handler (vec_extract_optab,
7903 vmode, elmode)
7904 != CODE_FOR_nothing))
7905 {
7906 nstores = lnunits;
7907 lnel = group_size;
7908 ltype = build_nonstandard_integer_type (lsize, 1);
7909 lvectype = build_vector_type (ltype, nstores);
7910 }
7911 /* Else fall back to vector extraction anyway.
7912 Fewer stores are more important than avoiding spilling
7913 of the vector we extract from. Compared to the
7914 construction case in vectorizable_load no store-forwarding
7915 issue exists here for reasonable archs. */
7916 }
7917 }
7918 else if (group_size >= const_nunits
7919 && group_size % const_nunits == 0)
7920 {
7921 nstores = 1;
7922 lnel = const_nunits;
7923 ltype = vectype;
7924 lvectype = vectype;
7925 }
7926 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7927 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7928 }
7929
7930 ivstep = stride_step;
7931 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7932 build_int_cst (TREE_TYPE (ivstep), vf));
7933
7934 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7935
7936 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7937 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7938 create_iv (stride_base, ivstep, NULL,
7939 loop, &incr_gsi, insert_after,
7940 &offvar, NULL);
7941 incr = gsi_stmt (incr_gsi);
7942
7943 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7944
7945 alias_off = build_int_cst (ref_type, 0);
7946 stmt_vec_info next_stmt_info = first_stmt_info;
7947 for (g = 0; g < group_size; g++)
7948 {
7949 running_off = offvar;
7950 if (g)
7951 {
7952 tree size = TYPE_SIZE_UNIT (ltype);
7953 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7954 size);
7955 tree newoff = copy_ssa_name (running_off, NULL);
7956 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7957 running_off, pos);
7958 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7959 running_off = newoff;
7960 }
7961 if (!slp)
7962 op = vect_get_store_rhs (next_stmt_info);
7963 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7964 op, &vec_oprnds);
7965 unsigned int group_el = 0;
7966 unsigned HOST_WIDE_INT
7967 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7968 for (j = 0; j < ncopies; j++)
7969 {
7970 vec_oprnd = vec_oprnds[j];
7971 /* Pun the vector to extract from if necessary. */
7972 if (lvectype != vectype)
7973 {
7974 tree tem = make_ssa_name (lvectype);
7975 gimple *pun
7976 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7977 lvectype, vec_oprnd));
7978 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7979 vec_oprnd = tem;
7980 }
7981 for (i = 0; i < nstores; i++)
7982 {
7983 tree newref, newoff;
7984 gimple *incr, *assign;
7985 tree size = TYPE_SIZE (ltype);
7986 /* Extract the i'th component. */
7987 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7988 bitsize_int (i), size);
7989 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7990 size, pos);
7991
7992 elem = force_gimple_operand_gsi (gsi, elem, true,
7993 NULL_TREE, true,
7994 GSI_SAME_STMT);
7995
7996 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7997 group_el * elsz);
7998 newref = build2 (MEM_REF, ltype,
7999 running_off, this_off);
8000 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8001
8002 /* And store it to *running_off. */
8003 assign = gimple_build_assign (newref, elem);
8004 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8005
8006 group_el += lnel;
8007 if (! slp
8008 || group_el == group_size)
8009 {
8010 newoff = copy_ssa_name (running_off, NULL);
8011 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8012 running_off, stride_step);
8013 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8014
8015 running_off = newoff;
8016 group_el = 0;
8017 }
8018 if (g == group_size - 1
8019 && !slp)
8020 {
8021 if (j == 0 && i == 0)
8022 *vec_stmt = assign;
8023 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8024 }
8025 }
8026 }
8027 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8028 vec_oprnds.release ();
8029 if (slp)
8030 break;
8031 }
8032
8033 return true;
8034 }
8035
8036 auto_vec<tree> dr_chain (group_size);
8037 oprnds.create (group_size);
8038
8039 gcc_assert (alignment_support_scheme);
8040 vec_loop_masks *loop_masks
8041 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8042 ? &LOOP_VINFO_MASKS (loop_vinfo)
8043 : NULL);
8044 vec_loop_lens *loop_lens
8045 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8046 ? &LOOP_VINFO_LENS (loop_vinfo)
8047 : NULL);
8048
8049 /* Shouldn't go with length-based approach if fully masked. */
8050 gcc_assert (!loop_lens || !loop_masks);
8051
8052 /* Targets with store-lane instructions must not require explicit
8053 realignment. vect_supportable_dr_alignment always returns either
8054 dr_aligned or dr_unaligned_supported for masked operations. */
8055 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8056 && !mask
8057 && !loop_masks)
8058 || alignment_support_scheme == dr_aligned
8059 || alignment_support_scheme == dr_unaligned_supported);
8060
8061 tree offset = NULL_TREE;
8062 if (!known_eq (poffset, 0))
8063 offset = size_int (poffset);
8064
8065 tree bump;
8066 tree vec_offset = NULL_TREE;
8067 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8068 {
8069 aggr_type = NULL_TREE;
8070 bump = NULL_TREE;
8071 }
8072 else if (memory_access_type == VMAT_GATHER_SCATTER)
8073 {
8074 aggr_type = elem_type;
8075 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8076 &bump, &vec_offset);
8077 }
8078 else
8079 {
8080 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8081 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8082 else
8083 aggr_type = vectype;
8084 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8085 memory_access_type);
8086 }
8087
8088 if (mask)
8089 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8090
8091 /* In case the vectorization factor (VF) is bigger than the number
8092 of elements that we can fit in a vectype (nunits), we have to generate
8093 more than one vector stmt - i.e - we need to "unroll" the
8094 vector stmt by a factor VF/nunits. */
8095
8096 /* In case of interleaving (non-unit grouped access):
8097
8098 S1: &base + 2 = x2
8099 S2: &base = x0
8100 S3: &base + 1 = x1
8101 S4: &base + 3 = x3
8102
8103 We create vectorized stores starting from base address (the access of the
8104 first stmt in the chain (S2 in the above example), when the last store stmt
8105 of the chain (S4) is reached:
8106
8107 VS1: &base = vx2
8108 VS2: &base + vec_size*1 = vx0
8109 VS3: &base + vec_size*2 = vx1
8110 VS4: &base + vec_size*3 = vx3
8111
8112 Then permutation statements are generated:
8113
8114 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8115 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8116 ...
8117
8118 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8119 (the order of the data-refs in the output of vect_permute_store_chain
8120 corresponds to the order of scalar stmts in the interleaving chain - see
8121 the documentation of vect_permute_store_chain()).
8122
8123 In case of both multiple types and interleaving, above vector stores and
8124 permutation stmts are created for every copy. The result vector stmts are
8125 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8126 STMT_VINFO_RELATED_STMT for the next copies.
8127 */
8128
8129 auto_vec<tree> vec_masks;
8130 tree vec_mask = NULL;
8131 auto_vec<tree> vec_offsets;
8132 auto_vec<vec<tree> > gvec_oprnds;
8133 gvec_oprnds.safe_grow_cleared (group_size, true);
8134 for (j = 0; j < ncopies; j++)
8135 {
8136 gimple *new_stmt;
8137 if (j == 0)
8138 {
8139 if (slp)
8140 {
8141 /* Get vectorized arguments for SLP_NODE. */
8142 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8143 op, &vec_oprnds);
8144 vec_oprnd = vec_oprnds[0];
8145 }
8146 else
8147 {
8148 /* For interleaved stores we collect vectorized defs for all the
8149 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8150 used as an input to vect_permute_store_chain().
8151
8152 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8153 and OPRNDS are of size 1. */
8154 stmt_vec_info next_stmt_info = first_stmt_info;
8155 for (i = 0; i < group_size; i++)
8156 {
8157 /* Since gaps are not supported for interleaved stores,
8158 DR_GROUP_SIZE is the exact number of stmts in the chain.
8159 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8160 that there is no interleaving, DR_GROUP_SIZE is 1,
8161 and only one iteration of the loop will be executed. */
8162 op = vect_get_store_rhs (next_stmt_info);
8163 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8164 ncopies, op, &gvec_oprnds[i]);
8165 vec_oprnd = gvec_oprnds[i][0];
8166 dr_chain.quick_push (gvec_oprnds[i][0]);
8167 oprnds.quick_push (gvec_oprnds[i][0]);
8168 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8169 }
8170 if (mask)
8171 {
8172 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8173 mask, &vec_masks, mask_vectype);
8174 vec_mask = vec_masks[0];
8175 }
8176 }
8177
8178 /* We should have catched mismatched types earlier. */
8179 gcc_assert (useless_type_conversion_p (vectype,
8180 TREE_TYPE (vec_oprnd)));
8181 bool simd_lane_access_p
8182 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8183 if (simd_lane_access_p
8184 && !loop_masks
8185 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8186 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8187 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8188 && integer_zerop (DR_INIT (first_dr_info->dr))
8189 && alias_sets_conflict_p (get_alias_set (aggr_type),
8190 get_alias_set (TREE_TYPE (ref_type))))
8191 {
8192 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8193 dataref_offset = build_int_cst (ref_type, 0);
8194 }
8195 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8196 {
8197 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8198 slp_node, &gs_info, &dataref_ptr,
8199 &vec_offsets);
8200 vec_offset = vec_offsets[0];
8201 }
8202 else
8203 dataref_ptr
8204 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8205 simd_lane_access_p ? loop : NULL,
8206 offset, &dummy, gsi, &ptr_incr,
8207 simd_lane_access_p, bump);
8208 }
8209 else
8210 {
8211 /* For interleaved stores we created vectorized defs for all the
8212 defs stored in OPRNDS in the previous iteration (previous copy).
8213 DR_CHAIN is then used as an input to vect_permute_store_chain().
8214 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8215 OPRNDS are of size 1. */
8216 for (i = 0; i < group_size; i++)
8217 {
8218 vec_oprnd = gvec_oprnds[i][j];
8219 dr_chain[i] = gvec_oprnds[i][j];
8220 oprnds[i] = gvec_oprnds[i][j];
8221 }
8222 if (mask)
8223 vec_mask = vec_masks[j];
8224 if (dataref_offset)
8225 dataref_offset
8226 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8227 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8228 vec_offset = vec_offsets[j];
8229 else
8230 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8231 stmt_info, bump);
8232 }
8233
8234 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8235 {
8236 tree vec_array;
8237
8238 /* Get an array into which we can store the individual vectors. */
8239 vec_array = create_vector_array (vectype, vec_num);
8240
8241 /* Invalidate the current contents of VEC_ARRAY. This should
8242 become an RTL clobber too, which prevents the vector registers
8243 from being upward-exposed. */
8244 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8245
8246 /* Store the individual vectors into the array. */
8247 for (i = 0; i < vec_num; i++)
8248 {
8249 vec_oprnd = dr_chain[i];
8250 write_vector_array (vinfo, stmt_info,
8251 gsi, vec_oprnd, vec_array, i);
8252 }
8253
8254 tree final_mask = NULL;
8255 if (loop_masks)
8256 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8257 vectype, j);
8258 if (vec_mask)
8259 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8260 final_mask, vec_mask, gsi);
8261
8262 gcall *call;
8263 if (final_mask)
8264 {
8265 /* Emit:
8266 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8267 VEC_ARRAY). */
8268 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8269 tree alias_ptr = build_int_cst (ref_type, align);
8270 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8271 dataref_ptr, alias_ptr,
8272 final_mask, vec_array);
8273 }
8274 else
8275 {
8276 /* Emit:
8277 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8278 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8279 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8280 vec_array);
8281 gimple_call_set_lhs (call, data_ref);
8282 }
8283 gimple_call_set_nothrow (call, true);
8284 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8285 new_stmt = call;
8286
8287 /* Record that VEC_ARRAY is now dead. */
8288 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8289 }
8290 else
8291 {
8292 new_stmt = NULL;
8293 if (grouped_store)
8294 {
8295 if (j == 0)
8296 result_chain.create (group_size);
8297 /* Permute. */
8298 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8299 gsi, &result_chain);
8300 }
8301
8302 stmt_vec_info next_stmt_info = first_stmt_info;
8303 for (i = 0; i < vec_num; i++)
8304 {
8305 unsigned misalign;
8306 unsigned HOST_WIDE_INT align;
8307
8308 tree final_mask = NULL_TREE;
8309 if (loop_masks)
8310 final_mask = vect_get_loop_mask (gsi, loop_masks,
8311 vec_num * ncopies,
8312 vectype, vec_num * j + i);
8313 if (vec_mask)
8314 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8315 final_mask, vec_mask, gsi);
8316
8317 if (memory_access_type == VMAT_GATHER_SCATTER)
8318 {
8319 tree scale = size_int (gs_info.scale);
8320 gcall *call;
8321 if (final_mask)
8322 call = gimple_build_call_internal
8323 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8324 scale, vec_oprnd, final_mask);
8325 else
8326 call = gimple_build_call_internal
8327 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8328 scale, vec_oprnd);
8329 gimple_call_set_nothrow (call, true);
8330 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8331 new_stmt = call;
8332 break;
8333 }
8334
8335 if (i > 0)
8336 /* Bump the vector pointer. */
8337 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8338 gsi, stmt_info, bump);
8339
8340 if (slp)
8341 vec_oprnd = vec_oprnds[i];
8342 else if (grouped_store)
8343 /* For grouped stores vectorized defs are interleaved in
8344 vect_permute_store_chain(). */
8345 vec_oprnd = result_chain[i];
8346
8347 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8348 if (alignment_support_scheme == dr_aligned)
8349 misalign = 0;
8350 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8351 {
8352 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8353 misalign = 0;
8354 }
8355 else
8356 misalign = misalignment;
8357 if (dataref_offset == NULL_TREE
8358 && TREE_CODE (dataref_ptr) == SSA_NAME)
8359 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8360 misalign);
8361 align = least_bit_hwi (misalign | align);
8362
8363 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8364 {
8365 tree perm_mask = perm_mask_for_reverse (vectype);
8366 tree perm_dest = vect_create_destination_var
8367 (vect_get_store_rhs (stmt_info), vectype);
8368 tree new_temp = make_ssa_name (perm_dest);
8369
8370 /* Generate the permute statement. */
8371 gimple *perm_stmt
8372 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8373 vec_oprnd, perm_mask);
8374 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8375
8376 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8377 vec_oprnd = new_temp;
8378 }
8379
8380 /* Arguments are ready. Create the new vector stmt. */
8381 if (final_mask)
8382 {
8383 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8384 gcall *call
8385 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8386 dataref_ptr, ptr,
8387 final_mask, vec_oprnd);
8388 gimple_call_set_nothrow (call, true);
8389 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8390 new_stmt = call;
8391 }
8392 else if (loop_lens)
8393 {
8394 tree final_len
8395 = vect_get_loop_len (loop_vinfo, loop_lens,
8396 vec_num * ncopies, vec_num * j + i);
8397 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8398 machine_mode vmode = TYPE_MODE (vectype);
8399 opt_machine_mode new_ovmode
8400 = get_len_load_store_mode (vmode, false);
8401 machine_mode new_vmode = new_ovmode.require ();
8402 /* Need conversion if it's wrapped with VnQI. */
8403 if (vmode != new_vmode)
8404 {
8405 tree new_vtype
8406 = build_vector_type_for_mode (unsigned_intQI_type_node,
8407 new_vmode);
8408 tree var
8409 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8410 vec_oprnd
8411 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8412 gassign *new_stmt
8413 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8414 vec_oprnd);
8415 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8416 gsi);
8417 vec_oprnd = var;
8418 }
8419
8420 signed char biasval =
8421 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8422
8423 tree bias = build_int_cst (intQI_type_node, biasval);
8424 gcall *call
8425 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8426 ptr, final_len, vec_oprnd,
8427 bias);
8428 gimple_call_set_nothrow (call, true);
8429 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8430 new_stmt = call;
8431 }
8432 else
8433 {
8434 data_ref = fold_build2 (MEM_REF, vectype,
8435 dataref_ptr,
8436 dataref_offset
8437 ? dataref_offset
8438 : build_int_cst (ref_type, 0));
8439 if (alignment_support_scheme == dr_aligned)
8440 ;
8441 else
8442 TREE_TYPE (data_ref)
8443 = build_aligned_type (TREE_TYPE (data_ref),
8444 align * BITS_PER_UNIT);
8445 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8446 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8447 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8448 }
8449
8450 if (slp)
8451 continue;
8452
8453 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8454 if (!next_stmt_info)
8455 break;
8456 }
8457 }
8458 if (!slp)
8459 {
8460 if (j == 0)
8461 *vec_stmt = new_stmt;
8462 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8463 }
8464 }
8465
8466 for (i = 0; i < group_size; ++i)
8467 {
8468 vec<tree> oprndsi = gvec_oprnds[i];
8469 oprndsi.release ();
8470 }
8471 oprnds.release ();
8472 result_chain.release ();
8473 vec_oprnds.release ();
8474
8475 return true;
8476 }
8477
8478 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8479 VECTOR_CST mask. No checks are made that the target platform supports the
8480 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8481 vect_gen_perm_mask_checked. */
8482
8483 tree
8484 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8485 {
8486 tree mask_type;
8487
8488 poly_uint64 nunits = sel.length ();
8489 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8490
8491 mask_type = build_vector_type (ssizetype, nunits);
8492 return vec_perm_indices_to_tree (mask_type, sel);
8493 }
8494
8495 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8496 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8497
8498 tree
8499 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8500 {
8501 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8502 return vect_gen_perm_mask_any (vectype, sel);
8503 }
8504
8505 /* Given a vector variable X and Y, that was generated for the scalar
8506 STMT_INFO, generate instructions to permute the vector elements of X and Y
8507 using permutation mask MASK_VEC, insert them at *GSI and return the
8508 permuted vector variable. */
8509
8510 static tree
8511 permute_vec_elements (vec_info *vinfo,
8512 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8513 gimple_stmt_iterator *gsi)
8514 {
8515 tree vectype = TREE_TYPE (x);
8516 tree perm_dest, data_ref;
8517 gimple *perm_stmt;
8518
8519 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8520 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8521 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8522 else
8523 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8524 data_ref = make_ssa_name (perm_dest);
8525
8526 /* Generate the permute statement. */
8527 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8528 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8529
8530 return data_ref;
8531 }
8532
8533 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8534 inserting them on the loops preheader edge. Returns true if we
8535 were successful in doing so (and thus STMT_INFO can be moved then),
8536 otherwise returns false. */
8537
8538 static bool
8539 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8540 {
8541 ssa_op_iter i;
8542 tree op;
8543 bool any = false;
8544
8545 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8546 {
8547 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8548 if (!gimple_nop_p (def_stmt)
8549 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8550 {
8551 /* Make sure we don't need to recurse. While we could do
8552 so in simple cases when there are more complex use webs
8553 we don't have an easy way to preserve stmt order to fulfil
8554 dependencies within them. */
8555 tree op2;
8556 ssa_op_iter i2;
8557 if (gimple_code (def_stmt) == GIMPLE_PHI)
8558 return false;
8559 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8560 {
8561 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8562 if (!gimple_nop_p (def_stmt2)
8563 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8564 return false;
8565 }
8566 any = true;
8567 }
8568 }
8569
8570 if (!any)
8571 return true;
8572
8573 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8574 {
8575 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8576 if (!gimple_nop_p (def_stmt)
8577 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8578 {
8579 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8580 gsi_remove (&gsi, false);
8581 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8582 }
8583 }
8584
8585 return true;
8586 }
8587
8588 /* vectorizable_load.
8589
8590 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8591 that can be vectorized.
8592 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8593 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8594 Return true if STMT_INFO is vectorizable in this way. */
8595
8596 static bool
8597 vectorizable_load (vec_info *vinfo,
8598 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8599 gimple **vec_stmt, slp_tree slp_node,
8600 stmt_vector_for_cost *cost_vec)
8601 {
8602 tree scalar_dest;
8603 tree vec_dest = NULL;
8604 tree data_ref = NULL;
8605 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8606 class loop *loop = NULL;
8607 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8608 bool nested_in_vect_loop = false;
8609 tree elem_type;
8610 tree new_temp;
8611 machine_mode mode;
8612 tree dummy;
8613 tree dataref_ptr = NULL_TREE;
8614 tree dataref_offset = NULL_TREE;
8615 gimple *ptr_incr = NULL;
8616 int ncopies;
8617 int i, j;
8618 unsigned int group_size;
8619 poly_uint64 group_gap_adj;
8620 tree msq = NULL_TREE, lsq;
8621 tree realignment_token = NULL_TREE;
8622 gphi *phi = NULL;
8623 vec<tree> dr_chain = vNULL;
8624 bool grouped_load = false;
8625 stmt_vec_info first_stmt_info;
8626 stmt_vec_info first_stmt_info_for_drptr = NULL;
8627 bool compute_in_loop = false;
8628 class loop *at_loop;
8629 int vec_num;
8630 bool slp = (slp_node != NULL);
8631 bool slp_perm = false;
8632 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8633 poly_uint64 vf;
8634 tree aggr_type;
8635 gather_scatter_info gs_info;
8636 tree ref_type;
8637 enum vect_def_type mask_dt = vect_unknown_def_type;
8638
8639 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8640 return false;
8641
8642 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8643 && ! vec_stmt)
8644 return false;
8645
8646 if (!STMT_VINFO_DATA_REF (stmt_info))
8647 return false;
8648
8649 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8650 int mask_index = -1;
8651 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8652 {
8653 scalar_dest = gimple_assign_lhs (assign);
8654 if (TREE_CODE (scalar_dest) != SSA_NAME)
8655 return false;
8656
8657 tree_code code = gimple_assign_rhs_code (assign);
8658 if (code != ARRAY_REF
8659 && code != BIT_FIELD_REF
8660 && code != INDIRECT_REF
8661 && code != COMPONENT_REF
8662 && code != IMAGPART_EXPR
8663 && code != REALPART_EXPR
8664 && code != MEM_REF
8665 && TREE_CODE_CLASS (code) != tcc_declaration)
8666 return false;
8667 }
8668 else
8669 {
8670 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8671 if (!call || !gimple_call_internal_p (call))
8672 return false;
8673
8674 internal_fn ifn = gimple_call_internal_fn (call);
8675 if (!internal_load_fn_p (ifn))
8676 return false;
8677
8678 scalar_dest = gimple_call_lhs (call);
8679 if (!scalar_dest)
8680 return false;
8681
8682 mask_index = internal_fn_mask_index (ifn);
8683 /* ??? For SLP the mask operand is always last. */
8684 if (mask_index >= 0 && slp_node)
8685 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8686 if (mask_index >= 0
8687 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8688 &mask, NULL, &mask_dt, &mask_vectype))
8689 return false;
8690 }
8691
8692 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8693 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8694
8695 if (loop_vinfo)
8696 {
8697 loop = LOOP_VINFO_LOOP (loop_vinfo);
8698 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8699 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8700 }
8701 else
8702 vf = 1;
8703
8704 /* Multiple types in SLP are handled by creating the appropriate number of
8705 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8706 case of SLP. */
8707 if (slp)
8708 ncopies = 1;
8709 else
8710 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8711
8712 gcc_assert (ncopies >= 1);
8713
8714 /* FORNOW. This restriction should be relaxed. */
8715 if (nested_in_vect_loop && ncopies > 1)
8716 {
8717 if (dump_enabled_p ())
8718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8719 "multiple types in nested loop.\n");
8720 return false;
8721 }
8722
8723 /* Invalidate assumptions made by dependence analysis when vectorization
8724 on the unrolled body effectively re-orders stmts. */
8725 if (ncopies > 1
8726 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8727 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8728 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8729 {
8730 if (dump_enabled_p ())
8731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8732 "cannot perform implicit CSE when unrolling "
8733 "with negative dependence distance\n");
8734 return false;
8735 }
8736
8737 elem_type = TREE_TYPE (vectype);
8738 mode = TYPE_MODE (vectype);
8739
8740 /* FORNOW. In some cases can vectorize even if data-type not supported
8741 (e.g. - data copies). */
8742 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8743 {
8744 if (dump_enabled_p ())
8745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8746 "Aligned load, but unsupported type.\n");
8747 return false;
8748 }
8749
8750 /* Check if the load is a part of an interleaving chain. */
8751 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8752 {
8753 grouped_load = true;
8754 /* FORNOW */
8755 gcc_assert (!nested_in_vect_loop);
8756 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8757
8758 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8759 group_size = DR_GROUP_SIZE (first_stmt_info);
8760
8761 /* Refuse non-SLP vectorization of SLP-only groups. */
8762 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8763 {
8764 if (dump_enabled_p ())
8765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8766 "cannot vectorize load in non-SLP mode.\n");
8767 return false;
8768 }
8769
8770 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8771 {
8772 slp_perm = true;
8773
8774 if (!loop_vinfo)
8775 {
8776 /* In BB vectorization we may not actually use a loaded vector
8777 accessing elements in excess of DR_GROUP_SIZE. */
8778 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8779 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8780 unsigned HOST_WIDE_INT nunits;
8781 unsigned j, k, maxk = 0;
8782 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8783 if (k > maxk)
8784 maxk = k;
8785 tree vectype = SLP_TREE_VECTYPE (slp_node);
8786 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8787 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8788 {
8789 if (dump_enabled_p ())
8790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8791 "BB vectorization with gaps at the end of "
8792 "a load is not supported\n");
8793 return false;
8794 }
8795 }
8796
8797 auto_vec<tree> tem;
8798 unsigned n_perms;
8799 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8800 true, &n_perms))
8801 {
8802 if (dump_enabled_p ())
8803 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8804 vect_location,
8805 "unsupported load permutation\n");
8806 return false;
8807 }
8808 }
8809
8810 /* Invalidate assumptions made by dependence analysis when vectorization
8811 on the unrolled body effectively re-orders stmts. */
8812 if (!PURE_SLP_STMT (stmt_info)
8813 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8814 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8815 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8816 {
8817 if (dump_enabled_p ())
8818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8819 "cannot perform implicit CSE when performing "
8820 "group loads with negative dependence distance\n");
8821 return false;
8822 }
8823 }
8824 else
8825 group_size = 1;
8826
8827 vect_memory_access_type memory_access_type;
8828 enum dr_alignment_support alignment_support_scheme;
8829 int misalignment;
8830 poly_int64 poffset;
8831 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8832 ncopies, &memory_access_type, &poffset,
8833 &alignment_support_scheme, &misalignment, &gs_info))
8834 return false;
8835
8836 if (mask)
8837 {
8838 if (memory_access_type == VMAT_CONTIGUOUS)
8839 {
8840 machine_mode vec_mode = TYPE_MODE (vectype);
8841 if (!VECTOR_MODE_P (vec_mode)
8842 || !can_vec_mask_load_store_p (vec_mode,
8843 TYPE_MODE (mask_vectype), true))
8844 return false;
8845 }
8846 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8847 && memory_access_type != VMAT_GATHER_SCATTER)
8848 {
8849 if (dump_enabled_p ())
8850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8851 "unsupported access type for masked load.\n");
8852 return false;
8853 }
8854 else if (memory_access_type == VMAT_GATHER_SCATTER
8855 && gs_info.ifn == IFN_LAST
8856 && !gs_info.decl)
8857 {
8858 if (dump_enabled_p ())
8859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8860 "unsupported masked emulated gather.\n");
8861 return false;
8862 }
8863 }
8864
8865 if (!vec_stmt) /* transformation not required. */
8866 {
8867 if (slp_node
8868 && mask
8869 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8870 mask_vectype))
8871 {
8872 if (dump_enabled_p ())
8873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8874 "incompatible vector types for invariants\n");
8875 return false;
8876 }
8877
8878 if (!slp)
8879 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8880
8881 if (loop_vinfo
8882 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8883 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8884 group_size, memory_access_type,
8885 ncopies, &gs_info, mask);
8886
8887 if (dump_enabled_p ()
8888 && memory_access_type != VMAT_ELEMENTWISE
8889 && memory_access_type != VMAT_GATHER_SCATTER
8890 && alignment_support_scheme != dr_aligned)
8891 dump_printf_loc (MSG_NOTE, vect_location,
8892 "Vectorizing an unaligned access.\n");
8893
8894 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8895 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8896 alignment_support_scheme, misalignment,
8897 &gs_info, slp_node, cost_vec);
8898 return true;
8899 }
8900
8901 if (!slp)
8902 gcc_assert (memory_access_type
8903 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8904
8905 if (dump_enabled_p ())
8906 dump_printf_loc (MSG_NOTE, vect_location,
8907 "transform load. ncopies = %d\n", ncopies);
8908
8909 /* Transform. */
8910
8911 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8912 ensure_base_align (dr_info);
8913
8914 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8915 {
8916 vect_build_gather_load_calls (vinfo,
8917 stmt_info, gsi, vec_stmt, &gs_info, mask);
8918 return true;
8919 }
8920
8921 if (memory_access_type == VMAT_INVARIANT)
8922 {
8923 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8924 /* If we have versioned for aliasing or the loop doesn't
8925 have any data dependencies that would preclude this,
8926 then we are sure this is a loop invariant load and
8927 thus we can insert it on the preheader edge. */
8928 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8929 && !nested_in_vect_loop
8930 && hoist_defs_of_uses (stmt_info, loop));
8931 if (hoist_p)
8932 {
8933 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8934 if (dump_enabled_p ())
8935 dump_printf_loc (MSG_NOTE, vect_location,
8936 "hoisting out of the vectorized loop: %G", stmt);
8937 scalar_dest = copy_ssa_name (scalar_dest);
8938 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8939 gsi_insert_on_edge_immediate
8940 (loop_preheader_edge (loop),
8941 gimple_build_assign (scalar_dest, rhs));
8942 }
8943 /* These copies are all equivalent, but currently the representation
8944 requires a separate STMT_VINFO_VEC_STMT for each one. */
8945 gimple_stmt_iterator gsi2 = *gsi;
8946 gsi_next (&gsi2);
8947 for (j = 0; j < ncopies; j++)
8948 {
8949 if (hoist_p)
8950 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8951 vectype, NULL);
8952 else
8953 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8954 vectype, &gsi2);
8955 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8956 if (slp)
8957 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8958 else
8959 {
8960 if (j == 0)
8961 *vec_stmt = new_stmt;
8962 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8963 }
8964 }
8965 return true;
8966 }
8967
8968 if (memory_access_type == VMAT_ELEMENTWISE
8969 || memory_access_type == VMAT_STRIDED_SLP)
8970 {
8971 gimple_stmt_iterator incr_gsi;
8972 bool insert_after;
8973 tree offvar;
8974 tree ivstep;
8975 tree running_off;
8976 vec<constructor_elt, va_gc> *v = NULL;
8977 tree stride_base, stride_step, alias_off;
8978 /* Checked by get_load_store_type. */
8979 unsigned int const_nunits = nunits.to_constant ();
8980 unsigned HOST_WIDE_INT cst_offset = 0;
8981 tree dr_offset;
8982
8983 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8984 gcc_assert (!nested_in_vect_loop);
8985
8986 if (grouped_load)
8987 {
8988 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8989 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8990 }
8991 else
8992 {
8993 first_stmt_info = stmt_info;
8994 first_dr_info = dr_info;
8995 }
8996 if (slp && grouped_load)
8997 {
8998 group_size = DR_GROUP_SIZE (first_stmt_info);
8999 ref_type = get_group_alias_ptr_type (first_stmt_info);
9000 }
9001 else
9002 {
9003 if (grouped_load)
9004 cst_offset
9005 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9006 * vect_get_place_in_interleaving_chain (stmt_info,
9007 first_stmt_info));
9008 group_size = 1;
9009 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9010 }
9011
9012 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9013 stride_base
9014 = fold_build_pointer_plus
9015 (DR_BASE_ADDRESS (first_dr_info->dr),
9016 size_binop (PLUS_EXPR,
9017 convert_to_ptrofftype (dr_offset),
9018 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9019 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9020
9021 /* For a load with loop-invariant (but other than power-of-2)
9022 stride (i.e. not a grouped access) like so:
9023
9024 for (i = 0; i < n; i += stride)
9025 ... = array[i];
9026
9027 we generate a new induction variable and new accesses to
9028 form a new vector (or vectors, depending on ncopies):
9029
9030 for (j = 0; ; j += VF*stride)
9031 tmp1 = array[j];
9032 tmp2 = array[j + stride];
9033 ...
9034 vectemp = {tmp1, tmp2, ...}
9035 */
9036
9037 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9038 build_int_cst (TREE_TYPE (stride_step), vf));
9039
9040 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9041
9042 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9043 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9044 create_iv (stride_base, ivstep, NULL,
9045 loop, &incr_gsi, insert_after,
9046 &offvar, NULL);
9047
9048 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9049
9050 running_off = offvar;
9051 alias_off = build_int_cst (ref_type, 0);
9052 int nloads = const_nunits;
9053 int lnel = 1;
9054 tree ltype = TREE_TYPE (vectype);
9055 tree lvectype = vectype;
9056 auto_vec<tree> dr_chain;
9057 if (memory_access_type == VMAT_STRIDED_SLP)
9058 {
9059 if (group_size < const_nunits)
9060 {
9061 /* First check if vec_init optab supports construction from vector
9062 elts directly. Otherwise avoid emitting a constructor of
9063 vector elements by performing the loads using an integer type
9064 of the same size, constructing a vector of those and then
9065 re-interpreting it as the original vector type. This avoids a
9066 huge runtime penalty due to the general inability to perform
9067 store forwarding from smaller stores to a larger load. */
9068 tree ptype;
9069 tree vtype
9070 = vector_vector_composition_type (vectype,
9071 const_nunits / group_size,
9072 &ptype);
9073 if (vtype != NULL_TREE)
9074 {
9075 nloads = const_nunits / group_size;
9076 lnel = group_size;
9077 lvectype = vtype;
9078 ltype = ptype;
9079 }
9080 }
9081 else
9082 {
9083 nloads = 1;
9084 lnel = const_nunits;
9085 ltype = vectype;
9086 }
9087 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9088 }
9089 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9090 else if (nloads == 1)
9091 ltype = vectype;
9092
9093 if (slp)
9094 {
9095 /* For SLP permutation support we need to load the whole group,
9096 not only the number of vector stmts the permutation result
9097 fits in. */
9098 if (slp_perm)
9099 {
9100 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9101 variable VF. */
9102 unsigned int const_vf = vf.to_constant ();
9103 ncopies = CEIL (group_size * const_vf, const_nunits);
9104 dr_chain.create (ncopies);
9105 }
9106 else
9107 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9108 }
9109 unsigned int group_el = 0;
9110 unsigned HOST_WIDE_INT
9111 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9112 for (j = 0; j < ncopies; j++)
9113 {
9114 if (nloads > 1)
9115 vec_alloc (v, nloads);
9116 gimple *new_stmt = NULL;
9117 for (i = 0; i < nloads; i++)
9118 {
9119 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9120 group_el * elsz + cst_offset);
9121 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9122 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9123 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9124 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9125 if (nloads > 1)
9126 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9127 gimple_assign_lhs (new_stmt));
9128
9129 group_el += lnel;
9130 if (! slp
9131 || group_el == group_size)
9132 {
9133 tree newoff = copy_ssa_name (running_off);
9134 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9135 running_off, stride_step);
9136 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9137
9138 running_off = newoff;
9139 group_el = 0;
9140 }
9141 }
9142 if (nloads > 1)
9143 {
9144 tree vec_inv = build_constructor (lvectype, v);
9145 new_temp = vect_init_vector (vinfo, stmt_info,
9146 vec_inv, lvectype, gsi);
9147 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9148 if (lvectype != vectype)
9149 {
9150 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9151 VIEW_CONVERT_EXPR,
9152 build1 (VIEW_CONVERT_EXPR,
9153 vectype, new_temp));
9154 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9155 }
9156 }
9157
9158 if (slp)
9159 {
9160 if (slp_perm)
9161 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9162 else
9163 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9164 }
9165 else
9166 {
9167 if (j == 0)
9168 *vec_stmt = new_stmt;
9169 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9170 }
9171 }
9172 if (slp_perm)
9173 {
9174 unsigned n_perms;
9175 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9176 false, &n_perms);
9177 }
9178 return true;
9179 }
9180
9181 if (memory_access_type == VMAT_GATHER_SCATTER
9182 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9183 grouped_load = false;
9184
9185 if (grouped_load)
9186 {
9187 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9188 group_size = DR_GROUP_SIZE (first_stmt_info);
9189 /* For SLP vectorization we directly vectorize a subchain
9190 without permutation. */
9191 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9192 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9193 /* For BB vectorization always use the first stmt to base
9194 the data ref pointer on. */
9195 if (bb_vinfo)
9196 first_stmt_info_for_drptr
9197 = vect_find_first_scalar_stmt_in_slp (slp_node);
9198
9199 /* Check if the chain of loads is already vectorized. */
9200 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9201 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9202 ??? But we can only do so if there is exactly one
9203 as we have no way to get at the rest. Leave the CSE
9204 opportunity alone.
9205 ??? With the group load eventually participating
9206 in multiple different permutations (having multiple
9207 slp nodes which refer to the same group) the CSE
9208 is even wrong code. See PR56270. */
9209 && !slp)
9210 {
9211 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9212 return true;
9213 }
9214 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9215 group_gap_adj = 0;
9216
9217 /* VEC_NUM is the number of vect stmts to be created for this group. */
9218 if (slp)
9219 {
9220 grouped_load = false;
9221 /* If an SLP permutation is from N elements to N elements,
9222 and if one vector holds a whole number of N, we can load
9223 the inputs to the permutation in the same way as an
9224 unpermuted sequence. In other cases we need to load the
9225 whole group, not only the number of vector stmts the
9226 permutation result fits in. */
9227 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9228 if (slp_perm
9229 && (group_size != scalar_lanes
9230 || !multiple_p (nunits, group_size)))
9231 {
9232 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9233 variable VF; see vect_transform_slp_perm_load. */
9234 unsigned int const_vf = vf.to_constant ();
9235 unsigned int const_nunits = nunits.to_constant ();
9236 vec_num = CEIL (group_size * const_vf, const_nunits);
9237 group_gap_adj = vf * group_size - nunits * vec_num;
9238 }
9239 else
9240 {
9241 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9242 group_gap_adj
9243 = group_size - scalar_lanes;
9244 }
9245 }
9246 else
9247 vec_num = group_size;
9248
9249 ref_type = get_group_alias_ptr_type (first_stmt_info);
9250 }
9251 else
9252 {
9253 first_stmt_info = stmt_info;
9254 first_dr_info = dr_info;
9255 group_size = vec_num = 1;
9256 group_gap_adj = 0;
9257 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9258 if (slp)
9259 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9260 }
9261
9262 gcc_assert (alignment_support_scheme);
9263 vec_loop_masks *loop_masks
9264 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9265 ? &LOOP_VINFO_MASKS (loop_vinfo)
9266 : NULL);
9267 vec_loop_lens *loop_lens
9268 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9269 ? &LOOP_VINFO_LENS (loop_vinfo)
9270 : NULL);
9271
9272 /* Shouldn't go with length-based approach if fully masked. */
9273 gcc_assert (!loop_lens || !loop_masks);
9274
9275 /* Targets with store-lane instructions must not require explicit
9276 realignment. vect_supportable_dr_alignment always returns either
9277 dr_aligned or dr_unaligned_supported for masked operations. */
9278 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9279 && !mask
9280 && !loop_masks)
9281 || alignment_support_scheme == dr_aligned
9282 || alignment_support_scheme == dr_unaligned_supported);
9283
9284 /* In case the vectorization factor (VF) is bigger than the number
9285 of elements that we can fit in a vectype (nunits), we have to generate
9286 more than one vector stmt - i.e - we need to "unroll" the
9287 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9288 from one copy of the vector stmt to the next, in the field
9289 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9290 stages to find the correct vector defs to be used when vectorizing
9291 stmts that use the defs of the current stmt. The example below
9292 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9293 need to create 4 vectorized stmts):
9294
9295 before vectorization:
9296 RELATED_STMT VEC_STMT
9297 S1: x = memref - -
9298 S2: z = x + 1 - -
9299
9300 step 1: vectorize stmt S1:
9301 We first create the vector stmt VS1_0, and, as usual, record a
9302 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9303 Next, we create the vector stmt VS1_1, and record a pointer to
9304 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9305 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9306 stmts and pointers:
9307 RELATED_STMT VEC_STMT
9308 VS1_0: vx0 = memref0 VS1_1 -
9309 VS1_1: vx1 = memref1 VS1_2 -
9310 VS1_2: vx2 = memref2 VS1_3 -
9311 VS1_3: vx3 = memref3 - -
9312 S1: x = load - VS1_0
9313 S2: z = x + 1 - -
9314 */
9315
9316 /* In case of interleaving (non-unit grouped access):
9317
9318 S1: x2 = &base + 2
9319 S2: x0 = &base
9320 S3: x1 = &base + 1
9321 S4: x3 = &base + 3
9322
9323 Vectorized loads are created in the order of memory accesses
9324 starting from the access of the first stmt of the chain:
9325
9326 VS1: vx0 = &base
9327 VS2: vx1 = &base + vec_size*1
9328 VS3: vx3 = &base + vec_size*2
9329 VS4: vx4 = &base + vec_size*3
9330
9331 Then permutation statements are generated:
9332
9333 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9334 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9335 ...
9336
9337 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9338 (the order of the data-refs in the output of vect_permute_load_chain
9339 corresponds to the order of scalar stmts in the interleaving chain - see
9340 the documentation of vect_permute_load_chain()).
9341 The generation of permutation stmts and recording them in
9342 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9343
9344 In case of both multiple types and interleaving, the vector loads and
9345 permutation stmts above are created for every copy. The result vector
9346 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9347 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9348
9349 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9350 on a target that supports unaligned accesses (dr_unaligned_supported)
9351 we generate the following code:
9352 p = initial_addr;
9353 indx = 0;
9354 loop {
9355 p = p + indx * vectype_size;
9356 vec_dest = *(p);
9357 indx = indx + 1;
9358 }
9359
9360 Otherwise, the data reference is potentially unaligned on a target that
9361 does not support unaligned accesses (dr_explicit_realign_optimized) -
9362 then generate the following code, in which the data in each iteration is
9363 obtained by two vector loads, one from the previous iteration, and one
9364 from the current iteration:
9365 p1 = initial_addr;
9366 msq_init = *(floor(p1))
9367 p2 = initial_addr + VS - 1;
9368 realignment_token = call target_builtin;
9369 indx = 0;
9370 loop {
9371 p2 = p2 + indx * vectype_size
9372 lsq = *(floor(p2))
9373 vec_dest = realign_load (msq, lsq, realignment_token)
9374 indx = indx + 1;
9375 msq = lsq;
9376 } */
9377
9378 /* If the misalignment remains the same throughout the execution of the
9379 loop, we can create the init_addr and permutation mask at the loop
9380 preheader. Otherwise, it needs to be created inside the loop.
9381 This can only occur when vectorizing memory accesses in the inner-loop
9382 nested within an outer-loop that is being vectorized. */
9383
9384 if (nested_in_vect_loop
9385 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9386 GET_MODE_SIZE (TYPE_MODE (vectype))))
9387 {
9388 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9389 compute_in_loop = true;
9390 }
9391
9392 bool diff_first_stmt_info
9393 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9394
9395 tree offset = NULL_TREE;
9396 if ((alignment_support_scheme == dr_explicit_realign_optimized
9397 || alignment_support_scheme == dr_explicit_realign)
9398 && !compute_in_loop)
9399 {
9400 /* If we have different first_stmt_info, we can't set up realignment
9401 here, since we can't guarantee first_stmt_info DR has been
9402 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9403 distance from first_stmt_info DR instead as below. */
9404 if (!diff_first_stmt_info)
9405 msq = vect_setup_realignment (vinfo,
9406 first_stmt_info, gsi, &realignment_token,
9407 alignment_support_scheme, NULL_TREE,
9408 &at_loop);
9409 if (alignment_support_scheme == dr_explicit_realign_optimized)
9410 {
9411 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9412 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9413 size_one_node);
9414 gcc_assert (!first_stmt_info_for_drptr);
9415 }
9416 }
9417 else
9418 at_loop = loop;
9419
9420 if (!known_eq (poffset, 0))
9421 offset = (offset
9422 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9423 : size_int (poffset));
9424
9425 tree bump;
9426 tree vec_offset = NULL_TREE;
9427 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9428 {
9429 aggr_type = NULL_TREE;
9430 bump = NULL_TREE;
9431 }
9432 else if (memory_access_type == VMAT_GATHER_SCATTER)
9433 {
9434 aggr_type = elem_type;
9435 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9436 &bump, &vec_offset);
9437 }
9438 else
9439 {
9440 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9441 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9442 else
9443 aggr_type = vectype;
9444 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9445 memory_access_type);
9446 }
9447
9448 vec<tree> vec_offsets = vNULL;
9449 auto_vec<tree> vec_masks;
9450 if (mask)
9451 {
9452 if (slp_node)
9453 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9454 &vec_masks);
9455 else
9456 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9457 &vec_masks, mask_vectype);
9458 }
9459 tree vec_mask = NULL_TREE;
9460 poly_uint64 group_elt = 0;
9461 for (j = 0; j < ncopies; j++)
9462 {
9463 /* 1. Create the vector or array pointer update chain. */
9464 if (j == 0)
9465 {
9466 bool simd_lane_access_p
9467 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9468 if (simd_lane_access_p
9469 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9470 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9471 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9472 && integer_zerop (DR_INIT (first_dr_info->dr))
9473 && alias_sets_conflict_p (get_alias_set (aggr_type),
9474 get_alias_set (TREE_TYPE (ref_type)))
9475 && (alignment_support_scheme == dr_aligned
9476 || alignment_support_scheme == dr_unaligned_supported))
9477 {
9478 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9479 dataref_offset = build_int_cst (ref_type, 0);
9480 }
9481 else if (diff_first_stmt_info)
9482 {
9483 dataref_ptr
9484 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9485 aggr_type, at_loop, offset, &dummy,
9486 gsi, &ptr_incr, simd_lane_access_p,
9487 bump);
9488 /* Adjust the pointer by the difference to first_stmt. */
9489 data_reference_p ptrdr
9490 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9491 tree diff
9492 = fold_convert (sizetype,
9493 size_binop (MINUS_EXPR,
9494 DR_INIT (first_dr_info->dr),
9495 DR_INIT (ptrdr)));
9496 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9497 stmt_info, diff);
9498 if (alignment_support_scheme == dr_explicit_realign)
9499 {
9500 msq = vect_setup_realignment (vinfo,
9501 first_stmt_info_for_drptr, gsi,
9502 &realignment_token,
9503 alignment_support_scheme,
9504 dataref_ptr, &at_loop);
9505 gcc_assert (!compute_in_loop);
9506 }
9507 }
9508 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9509 {
9510 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9511 slp_node, &gs_info, &dataref_ptr,
9512 &vec_offsets);
9513 }
9514 else
9515 dataref_ptr
9516 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9517 at_loop,
9518 offset, &dummy, gsi, &ptr_incr,
9519 simd_lane_access_p, bump);
9520 if (mask)
9521 vec_mask = vec_masks[0];
9522 }
9523 else
9524 {
9525 if (dataref_offset)
9526 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9527 bump);
9528 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9529 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9530 stmt_info, bump);
9531 if (mask)
9532 vec_mask = vec_masks[j];
9533 }
9534
9535 if (grouped_load || slp_perm)
9536 dr_chain.create (vec_num);
9537
9538 gimple *new_stmt = NULL;
9539 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9540 {
9541 tree vec_array;
9542
9543 vec_array = create_vector_array (vectype, vec_num);
9544
9545 tree final_mask = NULL_TREE;
9546 if (loop_masks)
9547 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9548 vectype, j);
9549 if (vec_mask)
9550 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9551 final_mask, vec_mask, gsi);
9552
9553 gcall *call;
9554 if (final_mask)
9555 {
9556 /* Emit:
9557 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9558 VEC_MASK). */
9559 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9560 tree alias_ptr = build_int_cst (ref_type, align);
9561 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9562 dataref_ptr, alias_ptr,
9563 final_mask);
9564 }
9565 else
9566 {
9567 /* Emit:
9568 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9569 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9570 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9571 }
9572 gimple_call_set_lhs (call, vec_array);
9573 gimple_call_set_nothrow (call, true);
9574 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9575 new_stmt = call;
9576
9577 /* Extract each vector into an SSA_NAME. */
9578 for (i = 0; i < vec_num; i++)
9579 {
9580 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9581 vec_array, i);
9582 dr_chain.quick_push (new_temp);
9583 }
9584
9585 /* Record the mapping between SSA_NAMEs and statements. */
9586 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9587
9588 /* Record that VEC_ARRAY is now dead. */
9589 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9590 }
9591 else
9592 {
9593 for (i = 0; i < vec_num; i++)
9594 {
9595 tree final_mask = NULL_TREE;
9596 if (loop_masks
9597 && memory_access_type != VMAT_INVARIANT)
9598 final_mask = vect_get_loop_mask (gsi, loop_masks,
9599 vec_num * ncopies,
9600 vectype, vec_num * j + i);
9601 if (vec_mask)
9602 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9603 final_mask, vec_mask, gsi);
9604
9605 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9606 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9607 gsi, stmt_info, bump);
9608
9609 /* 2. Create the vector-load in the loop. */
9610 switch (alignment_support_scheme)
9611 {
9612 case dr_aligned:
9613 case dr_unaligned_supported:
9614 {
9615 unsigned int misalign;
9616 unsigned HOST_WIDE_INT align;
9617
9618 if (memory_access_type == VMAT_GATHER_SCATTER
9619 && gs_info.ifn != IFN_LAST)
9620 {
9621 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9622 vec_offset = vec_offsets[vec_num * j + i];
9623 tree zero = build_zero_cst (vectype);
9624 tree scale = size_int (gs_info.scale);
9625 gcall *call;
9626 if (final_mask)
9627 call = gimple_build_call_internal
9628 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9629 vec_offset, scale, zero, final_mask);
9630 else
9631 call = gimple_build_call_internal
9632 (IFN_GATHER_LOAD, 4, dataref_ptr,
9633 vec_offset, scale, zero);
9634 gimple_call_set_nothrow (call, true);
9635 new_stmt = call;
9636 data_ref = NULL_TREE;
9637 break;
9638 }
9639 else if (memory_access_type == VMAT_GATHER_SCATTER)
9640 {
9641 /* Emulated gather-scatter. */
9642 gcc_assert (!final_mask);
9643 unsigned HOST_WIDE_INT const_nunits
9644 = nunits.to_constant ();
9645 unsigned HOST_WIDE_INT const_offset_nunits
9646 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9647 .to_constant ();
9648 vec<constructor_elt, va_gc> *ctor_elts;
9649 vec_alloc (ctor_elts, const_nunits);
9650 gimple_seq stmts = NULL;
9651 /* We support offset vectors with more elements
9652 than the data vector for now. */
9653 unsigned HOST_WIDE_INT factor
9654 = const_offset_nunits / const_nunits;
9655 vec_offset = vec_offsets[j / factor];
9656 unsigned elt_offset = (j % factor) * const_nunits;
9657 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9658 tree scale = size_int (gs_info.scale);
9659 align
9660 = get_object_alignment (DR_REF (first_dr_info->dr));
9661 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9662 align);
9663 for (unsigned k = 0; k < const_nunits; ++k)
9664 {
9665 tree boff = size_binop (MULT_EXPR,
9666 TYPE_SIZE (idx_type),
9667 bitsize_int
9668 (k + elt_offset));
9669 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9670 idx_type, vec_offset,
9671 TYPE_SIZE (idx_type),
9672 boff);
9673 idx = gimple_convert (&stmts, sizetype, idx);
9674 idx = gimple_build (&stmts, MULT_EXPR,
9675 sizetype, idx, scale);
9676 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9677 TREE_TYPE (dataref_ptr),
9678 dataref_ptr, idx);
9679 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9680 tree elt = make_ssa_name (TREE_TYPE (vectype));
9681 tree ref = build2 (MEM_REF, ltype, ptr,
9682 build_int_cst (ref_type, 0));
9683 new_stmt = gimple_build_assign (elt, ref);
9684 gimple_seq_add_stmt (&stmts, new_stmt);
9685 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9686 }
9687 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9688 new_stmt = gimple_build_assign (NULL_TREE,
9689 build_constructor
9690 (vectype, ctor_elts));
9691 data_ref = NULL_TREE;
9692 break;
9693 }
9694
9695 align =
9696 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9697 if (alignment_support_scheme == dr_aligned)
9698 misalign = 0;
9699 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9700 {
9701 align = dr_alignment
9702 (vect_dr_behavior (vinfo, first_dr_info));
9703 misalign = 0;
9704 }
9705 else
9706 misalign = misalignment;
9707 if (dataref_offset == NULL_TREE
9708 && TREE_CODE (dataref_ptr) == SSA_NAME)
9709 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9710 align, misalign);
9711 align = least_bit_hwi (misalign | align);
9712
9713 if (final_mask)
9714 {
9715 tree ptr = build_int_cst (ref_type,
9716 align * BITS_PER_UNIT);
9717 gcall *call
9718 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9719 dataref_ptr, ptr,
9720 final_mask);
9721 gimple_call_set_nothrow (call, true);
9722 new_stmt = call;
9723 data_ref = NULL_TREE;
9724 }
9725 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9726 {
9727 tree final_len
9728 = vect_get_loop_len (loop_vinfo, loop_lens,
9729 vec_num * ncopies,
9730 vec_num * j + i);
9731 tree ptr = build_int_cst (ref_type,
9732 align * BITS_PER_UNIT);
9733
9734 machine_mode vmode = TYPE_MODE (vectype);
9735 opt_machine_mode new_ovmode
9736 = get_len_load_store_mode (vmode, true);
9737 machine_mode new_vmode = new_ovmode.require ();
9738 tree qi_type = unsigned_intQI_type_node;
9739
9740 signed char biasval =
9741 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9742
9743 tree bias = build_int_cst (intQI_type_node, biasval);
9744
9745 gcall *call
9746 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
9747 dataref_ptr, ptr,
9748 final_len, bias);
9749 gimple_call_set_nothrow (call, true);
9750 new_stmt = call;
9751 data_ref = NULL_TREE;
9752
9753 /* Need conversion if it's wrapped with VnQI. */
9754 if (vmode != new_vmode)
9755 {
9756 tree new_vtype
9757 = build_vector_type_for_mode (qi_type, new_vmode);
9758 tree var = vect_get_new_ssa_name (new_vtype,
9759 vect_simple_var);
9760 gimple_set_lhs (call, var);
9761 vect_finish_stmt_generation (vinfo, stmt_info, call,
9762 gsi);
9763 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9764 new_stmt
9765 = gimple_build_assign (vec_dest,
9766 VIEW_CONVERT_EXPR, op);
9767 }
9768 }
9769 else
9770 {
9771 tree ltype = vectype;
9772 tree new_vtype = NULL_TREE;
9773 unsigned HOST_WIDE_INT gap
9774 = DR_GROUP_GAP (first_stmt_info);
9775 unsigned int vect_align
9776 = vect_known_alignment_in_bytes (first_dr_info,
9777 vectype);
9778 unsigned int scalar_dr_size
9779 = vect_get_scalar_dr_size (first_dr_info);
9780 /* If there's no peeling for gaps but we have a gap
9781 with slp loads then load the lower half of the
9782 vector only. See get_group_load_store_type for
9783 when we apply this optimization. */
9784 if (slp
9785 && loop_vinfo
9786 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9787 && gap != 0
9788 && known_eq (nunits, (group_size - gap) * 2)
9789 && known_eq (nunits, group_size)
9790 && gap >= (vect_align / scalar_dr_size))
9791 {
9792 tree half_vtype;
9793 new_vtype
9794 = vector_vector_composition_type (vectype, 2,
9795 &half_vtype);
9796 if (new_vtype != NULL_TREE)
9797 ltype = half_vtype;
9798 }
9799 tree offset
9800 = (dataref_offset ? dataref_offset
9801 : build_int_cst (ref_type, 0));
9802 if (ltype != vectype
9803 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9804 {
9805 unsigned HOST_WIDE_INT gap_offset
9806 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9807 tree gapcst = build_int_cst (ref_type, gap_offset);
9808 offset = size_binop (PLUS_EXPR, offset, gapcst);
9809 }
9810 data_ref
9811 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9812 if (alignment_support_scheme == dr_aligned)
9813 ;
9814 else
9815 TREE_TYPE (data_ref)
9816 = build_aligned_type (TREE_TYPE (data_ref),
9817 align * BITS_PER_UNIT);
9818 if (ltype != vectype)
9819 {
9820 vect_copy_ref_info (data_ref,
9821 DR_REF (first_dr_info->dr));
9822 tree tem = make_ssa_name (ltype);
9823 new_stmt = gimple_build_assign (tem, data_ref);
9824 vect_finish_stmt_generation (vinfo, stmt_info,
9825 new_stmt, gsi);
9826 data_ref = NULL;
9827 vec<constructor_elt, va_gc> *v;
9828 vec_alloc (v, 2);
9829 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9830 {
9831 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9832 build_zero_cst (ltype));
9833 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9834 }
9835 else
9836 {
9837 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9838 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9839 build_zero_cst (ltype));
9840 }
9841 gcc_assert (new_vtype != NULL_TREE);
9842 if (new_vtype == vectype)
9843 new_stmt = gimple_build_assign (
9844 vec_dest, build_constructor (vectype, v));
9845 else
9846 {
9847 tree new_vname = make_ssa_name (new_vtype);
9848 new_stmt = gimple_build_assign (
9849 new_vname, build_constructor (new_vtype, v));
9850 vect_finish_stmt_generation (vinfo, stmt_info,
9851 new_stmt, gsi);
9852 new_stmt = gimple_build_assign (
9853 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9854 new_vname));
9855 }
9856 }
9857 }
9858 break;
9859 }
9860 case dr_explicit_realign:
9861 {
9862 tree ptr, bump;
9863
9864 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9865
9866 if (compute_in_loop)
9867 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9868 &realignment_token,
9869 dr_explicit_realign,
9870 dataref_ptr, NULL);
9871
9872 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9873 ptr = copy_ssa_name (dataref_ptr);
9874 else
9875 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9876 // For explicit realign the target alignment should be
9877 // known at compile time.
9878 unsigned HOST_WIDE_INT align =
9879 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9880 new_stmt = gimple_build_assign
9881 (ptr, BIT_AND_EXPR, dataref_ptr,
9882 build_int_cst
9883 (TREE_TYPE (dataref_ptr),
9884 -(HOST_WIDE_INT) align));
9885 vect_finish_stmt_generation (vinfo, stmt_info,
9886 new_stmt, gsi);
9887 data_ref
9888 = build2 (MEM_REF, vectype, ptr,
9889 build_int_cst (ref_type, 0));
9890 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9891 vec_dest = vect_create_destination_var (scalar_dest,
9892 vectype);
9893 new_stmt = gimple_build_assign (vec_dest, data_ref);
9894 new_temp = make_ssa_name (vec_dest, new_stmt);
9895 gimple_assign_set_lhs (new_stmt, new_temp);
9896 gimple_move_vops (new_stmt, stmt_info->stmt);
9897 vect_finish_stmt_generation (vinfo, stmt_info,
9898 new_stmt, gsi);
9899 msq = new_temp;
9900
9901 bump = size_binop (MULT_EXPR, vs,
9902 TYPE_SIZE_UNIT (elem_type));
9903 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9904 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9905 stmt_info, bump);
9906 new_stmt = gimple_build_assign
9907 (NULL_TREE, BIT_AND_EXPR, ptr,
9908 build_int_cst
9909 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9910 ptr = copy_ssa_name (ptr, new_stmt);
9911 gimple_assign_set_lhs (new_stmt, ptr);
9912 vect_finish_stmt_generation (vinfo, stmt_info,
9913 new_stmt, gsi);
9914 data_ref
9915 = build2 (MEM_REF, vectype, ptr,
9916 build_int_cst (ref_type, 0));
9917 break;
9918 }
9919 case dr_explicit_realign_optimized:
9920 {
9921 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9922 new_temp = copy_ssa_name (dataref_ptr);
9923 else
9924 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9925 // We should only be doing this if we know the target
9926 // alignment at compile time.
9927 unsigned HOST_WIDE_INT align =
9928 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9929 new_stmt = gimple_build_assign
9930 (new_temp, BIT_AND_EXPR, dataref_ptr,
9931 build_int_cst (TREE_TYPE (dataref_ptr),
9932 -(HOST_WIDE_INT) align));
9933 vect_finish_stmt_generation (vinfo, stmt_info,
9934 new_stmt, gsi);
9935 data_ref
9936 = build2 (MEM_REF, vectype, new_temp,
9937 build_int_cst (ref_type, 0));
9938 break;
9939 }
9940 default:
9941 gcc_unreachable ();
9942 }
9943 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9944 /* DATA_REF is null if we've already built the statement. */
9945 if (data_ref)
9946 {
9947 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9948 new_stmt = gimple_build_assign (vec_dest, data_ref);
9949 }
9950 new_temp = make_ssa_name (vec_dest, new_stmt);
9951 gimple_set_lhs (new_stmt, new_temp);
9952 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9953
9954 /* 3. Handle explicit realignment if necessary/supported.
9955 Create in loop:
9956 vec_dest = realign_load (msq, lsq, realignment_token) */
9957 if (alignment_support_scheme == dr_explicit_realign_optimized
9958 || alignment_support_scheme == dr_explicit_realign)
9959 {
9960 lsq = gimple_assign_lhs (new_stmt);
9961 if (!realignment_token)
9962 realignment_token = dataref_ptr;
9963 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9964 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9965 msq, lsq, realignment_token);
9966 new_temp = make_ssa_name (vec_dest, new_stmt);
9967 gimple_assign_set_lhs (new_stmt, new_temp);
9968 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9969
9970 if (alignment_support_scheme == dr_explicit_realign_optimized)
9971 {
9972 gcc_assert (phi);
9973 if (i == vec_num - 1 && j == ncopies - 1)
9974 add_phi_arg (phi, lsq,
9975 loop_latch_edge (containing_loop),
9976 UNKNOWN_LOCATION);
9977 msq = lsq;
9978 }
9979 }
9980
9981 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9982 {
9983 tree perm_mask = perm_mask_for_reverse (vectype);
9984 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9985 perm_mask, stmt_info, gsi);
9986 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9987 }
9988
9989 /* Collect vector loads and later create their permutation in
9990 vect_transform_grouped_load (). */
9991 if (grouped_load || slp_perm)
9992 dr_chain.quick_push (new_temp);
9993
9994 /* Store vector loads in the corresponding SLP_NODE. */
9995 if (slp && !slp_perm)
9996 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9997
9998 /* With SLP permutation we load the gaps as well, without
9999 we need to skip the gaps after we manage to fully load
10000 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10001 group_elt += nunits;
10002 if (maybe_ne (group_gap_adj, 0U)
10003 && !slp_perm
10004 && known_eq (group_elt, group_size - group_gap_adj))
10005 {
10006 poly_wide_int bump_val
10007 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10008 * group_gap_adj);
10009 if (tree_int_cst_sgn
10010 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10011 bump_val = -bump_val;
10012 tree bump = wide_int_to_tree (sizetype, bump_val);
10013 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10014 gsi, stmt_info, bump);
10015 group_elt = 0;
10016 }
10017 }
10018 /* Bump the vector pointer to account for a gap or for excess
10019 elements loaded for a permuted SLP load. */
10020 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10021 {
10022 poly_wide_int bump_val
10023 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10024 * group_gap_adj);
10025 if (tree_int_cst_sgn
10026 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10027 bump_val = -bump_val;
10028 tree bump = wide_int_to_tree (sizetype, bump_val);
10029 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10030 stmt_info, bump);
10031 }
10032 }
10033
10034 if (slp && !slp_perm)
10035 continue;
10036
10037 if (slp_perm)
10038 {
10039 unsigned n_perms;
10040 /* For SLP we know we've seen all possible uses of dr_chain so
10041 direct vect_transform_slp_perm_load to DCE the unused parts.
10042 ??? This is a hack to prevent compile-time issues as seen
10043 in PR101120 and friends. */
10044 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10045 gsi, vf, false, &n_perms,
10046 nullptr, true);
10047 gcc_assert (ok);
10048 }
10049 else
10050 {
10051 if (grouped_load)
10052 {
10053 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10054 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10055 group_size, gsi);
10056 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10057 }
10058 else
10059 {
10060 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10061 }
10062 }
10063 dr_chain.release ();
10064 }
10065 if (!slp)
10066 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10067
10068 return true;
10069 }
10070
10071 /* Function vect_is_simple_cond.
10072
10073 Input:
10074 LOOP - the loop that is being vectorized.
10075 COND - Condition that is checked for simple use.
10076
10077 Output:
10078 *COMP_VECTYPE - the vector type for the comparison.
10079 *DTS - The def types for the arguments of the comparison
10080
10081 Returns whether a COND can be vectorized. Checks whether
10082 condition operands are supportable using vec_is_simple_use. */
10083
10084 static bool
10085 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10086 slp_tree slp_node, tree *comp_vectype,
10087 enum vect_def_type *dts, tree vectype)
10088 {
10089 tree lhs, rhs;
10090 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10091 slp_tree slp_op;
10092
10093 /* Mask case. */
10094 if (TREE_CODE (cond) == SSA_NAME
10095 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10096 {
10097 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10098 &slp_op, &dts[0], comp_vectype)
10099 || !*comp_vectype
10100 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10101 return false;
10102 return true;
10103 }
10104
10105 if (!COMPARISON_CLASS_P (cond))
10106 return false;
10107
10108 lhs = TREE_OPERAND (cond, 0);
10109 rhs = TREE_OPERAND (cond, 1);
10110
10111 if (TREE_CODE (lhs) == SSA_NAME)
10112 {
10113 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10114 &lhs, &slp_op, &dts[0], &vectype1))
10115 return false;
10116 }
10117 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10118 || TREE_CODE (lhs) == FIXED_CST)
10119 dts[0] = vect_constant_def;
10120 else
10121 return false;
10122
10123 if (TREE_CODE (rhs) == SSA_NAME)
10124 {
10125 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10126 &rhs, &slp_op, &dts[1], &vectype2))
10127 return false;
10128 }
10129 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10130 || TREE_CODE (rhs) == FIXED_CST)
10131 dts[1] = vect_constant_def;
10132 else
10133 return false;
10134
10135 if (vectype1 && vectype2
10136 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10137 TYPE_VECTOR_SUBPARTS (vectype2)))
10138 return false;
10139
10140 *comp_vectype = vectype1 ? vectype1 : vectype2;
10141 /* Invariant comparison. */
10142 if (! *comp_vectype)
10143 {
10144 tree scalar_type = TREE_TYPE (lhs);
10145 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10146 *comp_vectype = truth_type_for (vectype);
10147 else
10148 {
10149 /* If we can widen the comparison to match vectype do so. */
10150 if (INTEGRAL_TYPE_P (scalar_type)
10151 && !slp_node
10152 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10153 TYPE_SIZE (TREE_TYPE (vectype))))
10154 scalar_type = build_nonstandard_integer_type
10155 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10156 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10157 slp_node);
10158 }
10159 }
10160
10161 return true;
10162 }
10163
10164 /* vectorizable_condition.
10165
10166 Check if STMT_INFO is conditional modify expression that can be vectorized.
10167 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10168 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10169 at GSI.
10170
10171 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10172
10173 Return true if STMT_INFO is vectorizable in this way. */
10174
10175 static bool
10176 vectorizable_condition (vec_info *vinfo,
10177 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10178 gimple **vec_stmt,
10179 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10180 {
10181 tree scalar_dest = NULL_TREE;
10182 tree vec_dest = NULL_TREE;
10183 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10184 tree then_clause, else_clause;
10185 tree comp_vectype = NULL_TREE;
10186 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10187 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10188 tree vec_compare;
10189 tree new_temp;
10190 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10191 enum vect_def_type dts[4]
10192 = {vect_unknown_def_type, vect_unknown_def_type,
10193 vect_unknown_def_type, vect_unknown_def_type};
10194 int ndts = 4;
10195 int ncopies;
10196 int vec_num;
10197 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10198 int i;
10199 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10200 vec<tree> vec_oprnds0 = vNULL;
10201 vec<tree> vec_oprnds1 = vNULL;
10202 vec<tree> vec_oprnds2 = vNULL;
10203 vec<tree> vec_oprnds3 = vNULL;
10204 tree vec_cmp_type;
10205 bool masked = false;
10206
10207 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10208 return false;
10209
10210 /* Is vectorizable conditional operation? */
10211 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10212 if (!stmt)
10213 return false;
10214
10215 code = gimple_assign_rhs_code (stmt);
10216 if (code != COND_EXPR)
10217 return false;
10218
10219 stmt_vec_info reduc_info = NULL;
10220 int reduc_index = -1;
10221 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10222 bool for_reduction
10223 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10224 if (for_reduction)
10225 {
10226 if (STMT_SLP_TYPE (stmt_info))
10227 return false;
10228 reduc_info = info_for_reduction (vinfo, stmt_info);
10229 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10230 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10231 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10232 || reduc_index != -1);
10233 }
10234 else
10235 {
10236 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10237 return false;
10238 }
10239
10240 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10241 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10242
10243 if (slp_node)
10244 {
10245 ncopies = 1;
10246 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10247 }
10248 else
10249 {
10250 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10251 vec_num = 1;
10252 }
10253
10254 gcc_assert (ncopies >= 1);
10255 if (for_reduction && ncopies > 1)
10256 return false; /* FORNOW */
10257
10258 cond_expr = gimple_assign_rhs1 (stmt);
10259
10260 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10261 &comp_vectype, &dts[0], vectype)
10262 || !comp_vectype)
10263 return false;
10264
10265 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10266 slp_tree then_slp_node, else_slp_node;
10267 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10268 &then_clause, &then_slp_node, &dts[2], &vectype1))
10269 return false;
10270 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10271 &else_clause, &else_slp_node, &dts[3], &vectype2))
10272 return false;
10273
10274 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10275 return false;
10276
10277 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10278 return false;
10279
10280 masked = !COMPARISON_CLASS_P (cond_expr);
10281 vec_cmp_type = truth_type_for (comp_vectype);
10282
10283 if (vec_cmp_type == NULL_TREE)
10284 return false;
10285
10286 cond_code = TREE_CODE (cond_expr);
10287 if (!masked)
10288 {
10289 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10290 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10291 }
10292
10293 /* For conditional reductions, the "then" value needs to be the candidate
10294 value calculated by this iteration while the "else" value needs to be
10295 the result carried over from previous iterations. If the COND_EXPR
10296 is the other way around, we need to swap it. */
10297 bool must_invert_cmp_result = false;
10298 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10299 {
10300 if (masked)
10301 must_invert_cmp_result = true;
10302 else
10303 {
10304 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10305 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10306 if (new_code == ERROR_MARK)
10307 must_invert_cmp_result = true;
10308 else
10309 {
10310 cond_code = new_code;
10311 /* Make sure we don't accidentally use the old condition. */
10312 cond_expr = NULL_TREE;
10313 }
10314 }
10315 std::swap (then_clause, else_clause);
10316 }
10317
10318 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10319 {
10320 /* Boolean values may have another representation in vectors
10321 and therefore we prefer bit operations over comparison for
10322 them (which also works for scalar masks). We store opcodes
10323 to use in bitop1 and bitop2. Statement is vectorized as
10324 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10325 depending on bitop1 and bitop2 arity. */
10326 switch (cond_code)
10327 {
10328 case GT_EXPR:
10329 bitop1 = BIT_NOT_EXPR;
10330 bitop2 = BIT_AND_EXPR;
10331 break;
10332 case GE_EXPR:
10333 bitop1 = BIT_NOT_EXPR;
10334 bitop2 = BIT_IOR_EXPR;
10335 break;
10336 case LT_EXPR:
10337 bitop1 = BIT_NOT_EXPR;
10338 bitop2 = BIT_AND_EXPR;
10339 std::swap (cond_expr0, cond_expr1);
10340 break;
10341 case LE_EXPR:
10342 bitop1 = BIT_NOT_EXPR;
10343 bitop2 = BIT_IOR_EXPR;
10344 std::swap (cond_expr0, cond_expr1);
10345 break;
10346 case NE_EXPR:
10347 bitop1 = BIT_XOR_EXPR;
10348 break;
10349 case EQ_EXPR:
10350 bitop1 = BIT_XOR_EXPR;
10351 bitop2 = BIT_NOT_EXPR;
10352 break;
10353 default:
10354 return false;
10355 }
10356 cond_code = SSA_NAME;
10357 }
10358
10359 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10360 && reduction_type == EXTRACT_LAST_REDUCTION
10361 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10362 {
10363 if (dump_enabled_p ())
10364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10365 "reduction comparison operation not supported.\n");
10366 return false;
10367 }
10368
10369 if (!vec_stmt)
10370 {
10371 if (bitop1 != NOP_EXPR)
10372 {
10373 machine_mode mode = TYPE_MODE (comp_vectype);
10374 optab optab;
10375
10376 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10377 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10378 return false;
10379
10380 if (bitop2 != NOP_EXPR)
10381 {
10382 optab = optab_for_tree_code (bitop2, comp_vectype,
10383 optab_default);
10384 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10385 return false;
10386 }
10387 }
10388
10389 vect_cost_for_stmt kind = vector_stmt;
10390 if (reduction_type == EXTRACT_LAST_REDUCTION)
10391 /* Count one reduction-like operation per vector. */
10392 kind = vec_to_scalar;
10393 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10394 return false;
10395
10396 if (slp_node
10397 && (!vect_maybe_update_slp_op_vectype
10398 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10399 || (op_adjust == 1
10400 && !vect_maybe_update_slp_op_vectype
10401 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10402 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10403 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10404 {
10405 if (dump_enabled_p ())
10406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10407 "incompatible vector types for invariants\n");
10408 return false;
10409 }
10410
10411 if (loop_vinfo && for_reduction
10412 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10413 {
10414 if (reduction_type == EXTRACT_LAST_REDUCTION)
10415 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10416 ncopies * vec_num, vectype, NULL);
10417 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10418 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10419 {
10420 if (dump_enabled_p ())
10421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10422 "conditional reduction prevents the use"
10423 " of partial vectors.\n");
10424 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10425 }
10426 }
10427
10428 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10429 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10430 cost_vec, kind);
10431 return true;
10432 }
10433
10434 /* Transform. */
10435
10436 /* Handle def. */
10437 scalar_dest = gimple_assign_lhs (stmt);
10438 if (reduction_type != EXTRACT_LAST_REDUCTION)
10439 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10440
10441 bool swap_cond_operands = false;
10442
10443 /* See whether another part of the vectorized code applies a loop
10444 mask to the condition, or to its inverse. */
10445
10446 vec_loop_masks *masks = NULL;
10447 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10448 {
10449 if (reduction_type == EXTRACT_LAST_REDUCTION)
10450 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10451 else
10452 {
10453 scalar_cond_masked_key cond (cond_expr, ncopies);
10454 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10455 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10456 else
10457 {
10458 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10459 tree_code orig_code = cond.code;
10460 cond.code = invert_tree_comparison (cond.code, honor_nans);
10461 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10462 {
10463 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10464 cond_code = cond.code;
10465 swap_cond_operands = true;
10466 }
10467 else
10468 {
10469 /* Try the inverse of the current mask. We check if the
10470 inverse mask is live and if so we generate a negate of
10471 the current mask such that we still honor NaNs. */
10472 cond.inverted_p = true;
10473 cond.code = orig_code;
10474 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10475 {
10476 bitop1 = orig_code;
10477 bitop2 = BIT_NOT_EXPR;
10478 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10479 cond_code = cond.code;
10480 swap_cond_operands = true;
10481 }
10482 }
10483 }
10484 }
10485 }
10486
10487 /* Handle cond expr. */
10488 if (masked)
10489 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10490 cond_expr, &vec_oprnds0, comp_vectype,
10491 then_clause, &vec_oprnds2, vectype,
10492 reduction_type != EXTRACT_LAST_REDUCTION
10493 ? else_clause : NULL, &vec_oprnds3, vectype);
10494 else
10495 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10496 cond_expr0, &vec_oprnds0, comp_vectype,
10497 cond_expr1, &vec_oprnds1, comp_vectype,
10498 then_clause, &vec_oprnds2, vectype,
10499 reduction_type != EXTRACT_LAST_REDUCTION
10500 ? else_clause : NULL, &vec_oprnds3, vectype);
10501
10502 /* Arguments are ready. Create the new vector stmt. */
10503 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10504 {
10505 vec_then_clause = vec_oprnds2[i];
10506 if (reduction_type != EXTRACT_LAST_REDUCTION)
10507 vec_else_clause = vec_oprnds3[i];
10508
10509 if (swap_cond_operands)
10510 std::swap (vec_then_clause, vec_else_clause);
10511
10512 if (masked)
10513 vec_compare = vec_cond_lhs;
10514 else
10515 {
10516 vec_cond_rhs = vec_oprnds1[i];
10517 if (bitop1 == NOP_EXPR)
10518 {
10519 gimple_seq stmts = NULL;
10520 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10521 vec_cond_lhs, vec_cond_rhs);
10522 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10523 }
10524 else
10525 {
10526 new_temp = make_ssa_name (vec_cmp_type);
10527 gassign *new_stmt;
10528 if (bitop1 == BIT_NOT_EXPR)
10529 new_stmt = gimple_build_assign (new_temp, bitop1,
10530 vec_cond_rhs);
10531 else
10532 new_stmt
10533 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10534 vec_cond_rhs);
10535 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10536 if (bitop2 == NOP_EXPR)
10537 vec_compare = new_temp;
10538 else if (bitop2 == BIT_NOT_EXPR)
10539 {
10540 /* Instead of doing ~x ? y : z do x ? z : y. */
10541 vec_compare = new_temp;
10542 std::swap (vec_then_clause, vec_else_clause);
10543 }
10544 else
10545 {
10546 vec_compare = make_ssa_name (vec_cmp_type);
10547 new_stmt
10548 = gimple_build_assign (vec_compare, bitop2,
10549 vec_cond_lhs, new_temp);
10550 vect_finish_stmt_generation (vinfo, stmt_info,
10551 new_stmt, gsi);
10552 }
10553 }
10554 }
10555
10556 /* If we decided to apply a loop mask to the result of the vector
10557 comparison, AND the comparison with the mask now. Later passes
10558 should then be able to reuse the AND results between mulitple
10559 vector statements.
10560
10561 For example:
10562 for (int i = 0; i < 100; ++i)
10563 x[i] = y[i] ? z[i] : 10;
10564
10565 results in following optimized GIMPLE:
10566
10567 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10568 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10569 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10570 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10571 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10572 vect_iftmp.11_47, { 10, ... }>;
10573
10574 instead of using a masked and unmasked forms of
10575 vec != { 0, ... } (masked in the MASK_LOAD,
10576 unmasked in the VEC_COND_EXPR). */
10577
10578 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10579 in cases where that's necessary. */
10580
10581 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10582 {
10583 if (!is_gimple_val (vec_compare))
10584 {
10585 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10586 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10587 vec_compare);
10588 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10589 vec_compare = vec_compare_name;
10590 }
10591
10592 if (must_invert_cmp_result)
10593 {
10594 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10595 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10596 BIT_NOT_EXPR,
10597 vec_compare);
10598 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10599 vec_compare = vec_compare_name;
10600 }
10601
10602 if (masks)
10603 {
10604 tree loop_mask
10605 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10606 vectype, i);
10607 tree tmp2 = make_ssa_name (vec_cmp_type);
10608 gassign *g
10609 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10610 loop_mask);
10611 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10612 vec_compare = tmp2;
10613 }
10614 }
10615
10616 gimple *new_stmt;
10617 if (reduction_type == EXTRACT_LAST_REDUCTION)
10618 {
10619 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10620 tree lhs = gimple_get_lhs (old_stmt);
10621 new_stmt = gimple_build_call_internal
10622 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10623 vec_then_clause);
10624 gimple_call_set_lhs (new_stmt, lhs);
10625 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10626 if (old_stmt == gsi_stmt (*gsi))
10627 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10628 else
10629 {
10630 /* In this case we're moving the definition to later in the
10631 block. That doesn't matter because the only uses of the
10632 lhs are in phi statements. */
10633 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10634 gsi_remove (&old_gsi, true);
10635 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10636 }
10637 }
10638 else
10639 {
10640 new_temp = make_ssa_name (vec_dest);
10641 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10642 vec_then_clause, vec_else_clause);
10643 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10644 }
10645 if (slp_node)
10646 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10647 else
10648 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10649 }
10650
10651 if (!slp_node)
10652 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10653
10654 vec_oprnds0.release ();
10655 vec_oprnds1.release ();
10656 vec_oprnds2.release ();
10657 vec_oprnds3.release ();
10658
10659 return true;
10660 }
10661
10662 /* vectorizable_comparison.
10663
10664 Check if STMT_INFO is comparison expression that can be vectorized.
10665 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10666 comparison, put it in VEC_STMT, and insert it at GSI.
10667
10668 Return true if STMT_INFO is vectorizable in this way. */
10669
10670 static bool
10671 vectorizable_comparison (vec_info *vinfo,
10672 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10673 gimple **vec_stmt,
10674 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10675 {
10676 tree lhs, rhs1, rhs2;
10677 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10678 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10679 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10680 tree new_temp;
10681 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10682 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10683 int ndts = 2;
10684 poly_uint64 nunits;
10685 int ncopies;
10686 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10687 int i;
10688 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10689 vec<tree> vec_oprnds0 = vNULL;
10690 vec<tree> vec_oprnds1 = vNULL;
10691 tree mask_type;
10692 tree mask;
10693
10694 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10695 return false;
10696
10697 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10698 return false;
10699
10700 mask_type = vectype;
10701 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10702
10703 if (slp_node)
10704 ncopies = 1;
10705 else
10706 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10707
10708 gcc_assert (ncopies >= 1);
10709 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10710 return false;
10711
10712 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10713 if (!stmt)
10714 return false;
10715
10716 code = gimple_assign_rhs_code (stmt);
10717
10718 if (TREE_CODE_CLASS (code) != tcc_comparison)
10719 return false;
10720
10721 slp_tree slp_rhs1, slp_rhs2;
10722 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10723 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10724 return false;
10725
10726 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10727 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10728 return false;
10729
10730 if (vectype1 && vectype2
10731 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10732 TYPE_VECTOR_SUBPARTS (vectype2)))
10733 return false;
10734
10735 vectype = vectype1 ? vectype1 : vectype2;
10736
10737 /* Invariant comparison. */
10738 if (!vectype)
10739 {
10740 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10741 vectype = mask_type;
10742 else
10743 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10744 slp_node);
10745 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10746 return false;
10747 }
10748 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10749 return false;
10750
10751 /* Can't compare mask and non-mask types. */
10752 if (vectype1 && vectype2
10753 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10754 return false;
10755
10756 /* Boolean values may have another representation in vectors
10757 and therefore we prefer bit operations over comparison for
10758 them (which also works for scalar masks). We store opcodes
10759 to use in bitop1 and bitop2. Statement is vectorized as
10760 BITOP2 (rhs1 BITOP1 rhs2) or
10761 rhs1 BITOP2 (BITOP1 rhs2)
10762 depending on bitop1 and bitop2 arity. */
10763 bool swap_p = false;
10764 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10765 {
10766 if (code == GT_EXPR)
10767 {
10768 bitop1 = BIT_NOT_EXPR;
10769 bitop2 = BIT_AND_EXPR;
10770 }
10771 else if (code == GE_EXPR)
10772 {
10773 bitop1 = BIT_NOT_EXPR;
10774 bitop2 = BIT_IOR_EXPR;
10775 }
10776 else if (code == LT_EXPR)
10777 {
10778 bitop1 = BIT_NOT_EXPR;
10779 bitop2 = BIT_AND_EXPR;
10780 swap_p = true;
10781 }
10782 else if (code == LE_EXPR)
10783 {
10784 bitop1 = BIT_NOT_EXPR;
10785 bitop2 = BIT_IOR_EXPR;
10786 swap_p = true;
10787 }
10788 else
10789 {
10790 bitop1 = BIT_XOR_EXPR;
10791 if (code == EQ_EXPR)
10792 bitop2 = BIT_NOT_EXPR;
10793 }
10794 }
10795
10796 if (!vec_stmt)
10797 {
10798 if (bitop1 == NOP_EXPR)
10799 {
10800 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10801 return false;
10802 }
10803 else
10804 {
10805 machine_mode mode = TYPE_MODE (vectype);
10806 optab optab;
10807
10808 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10809 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10810 return false;
10811
10812 if (bitop2 != NOP_EXPR)
10813 {
10814 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10815 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10816 return false;
10817 }
10818 }
10819
10820 /* Put types on constant and invariant SLP children. */
10821 if (slp_node
10822 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10823 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10824 {
10825 if (dump_enabled_p ())
10826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10827 "incompatible vector types for invariants\n");
10828 return false;
10829 }
10830
10831 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10832 vect_model_simple_cost (vinfo, stmt_info,
10833 ncopies * (1 + (bitop2 != NOP_EXPR)),
10834 dts, ndts, slp_node, cost_vec);
10835 return true;
10836 }
10837
10838 /* Transform. */
10839
10840 /* Handle def. */
10841 lhs = gimple_assign_lhs (stmt);
10842 mask = vect_create_destination_var (lhs, mask_type);
10843
10844 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10845 rhs1, &vec_oprnds0, vectype,
10846 rhs2, &vec_oprnds1, vectype);
10847 if (swap_p)
10848 std::swap (vec_oprnds0, vec_oprnds1);
10849
10850 /* Arguments are ready. Create the new vector stmt. */
10851 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10852 {
10853 gimple *new_stmt;
10854 vec_rhs2 = vec_oprnds1[i];
10855
10856 new_temp = make_ssa_name (mask);
10857 if (bitop1 == NOP_EXPR)
10858 {
10859 new_stmt = gimple_build_assign (new_temp, code,
10860 vec_rhs1, vec_rhs2);
10861 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10862 }
10863 else
10864 {
10865 if (bitop1 == BIT_NOT_EXPR)
10866 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10867 else
10868 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10869 vec_rhs2);
10870 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10871 if (bitop2 != NOP_EXPR)
10872 {
10873 tree res = make_ssa_name (mask);
10874 if (bitop2 == BIT_NOT_EXPR)
10875 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10876 else
10877 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10878 new_temp);
10879 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10880 }
10881 }
10882 if (slp_node)
10883 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10884 else
10885 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10886 }
10887
10888 if (!slp_node)
10889 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10890
10891 vec_oprnds0.release ();
10892 vec_oprnds1.release ();
10893
10894 return true;
10895 }
10896
10897 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10898 can handle all live statements in the node. Otherwise return true
10899 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10900 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10901
10902 static bool
10903 can_vectorize_live_stmts (vec_info *vinfo,
10904 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10905 slp_tree slp_node, slp_instance slp_node_instance,
10906 bool vec_stmt_p,
10907 stmt_vector_for_cost *cost_vec)
10908 {
10909 if (slp_node)
10910 {
10911 stmt_vec_info slp_stmt_info;
10912 unsigned int i;
10913 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10914 {
10915 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10916 && !vectorizable_live_operation (vinfo,
10917 slp_stmt_info, gsi, slp_node,
10918 slp_node_instance, i,
10919 vec_stmt_p, cost_vec))
10920 return false;
10921 }
10922 }
10923 else if (STMT_VINFO_LIVE_P (stmt_info)
10924 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10925 slp_node, slp_node_instance, -1,
10926 vec_stmt_p, cost_vec))
10927 return false;
10928
10929 return true;
10930 }
10931
10932 /* Make sure the statement is vectorizable. */
10933
10934 opt_result
10935 vect_analyze_stmt (vec_info *vinfo,
10936 stmt_vec_info stmt_info, bool *need_to_vectorize,
10937 slp_tree node, slp_instance node_instance,
10938 stmt_vector_for_cost *cost_vec)
10939 {
10940 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10941 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10942 bool ok;
10943 gimple_seq pattern_def_seq;
10944
10945 if (dump_enabled_p ())
10946 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10947 stmt_info->stmt);
10948
10949 if (gimple_has_volatile_ops (stmt_info->stmt))
10950 return opt_result::failure_at (stmt_info->stmt,
10951 "not vectorized:"
10952 " stmt has volatile operands: %G\n",
10953 stmt_info->stmt);
10954
10955 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10956 && node == NULL
10957 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10958 {
10959 gimple_stmt_iterator si;
10960
10961 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10962 {
10963 stmt_vec_info pattern_def_stmt_info
10964 = vinfo->lookup_stmt (gsi_stmt (si));
10965 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10966 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10967 {
10968 /* Analyze def stmt of STMT if it's a pattern stmt. */
10969 if (dump_enabled_p ())
10970 dump_printf_loc (MSG_NOTE, vect_location,
10971 "==> examining pattern def statement: %G",
10972 pattern_def_stmt_info->stmt);
10973
10974 opt_result res
10975 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10976 need_to_vectorize, node, node_instance,
10977 cost_vec);
10978 if (!res)
10979 return res;
10980 }
10981 }
10982 }
10983
10984 /* Skip stmts that do not need to be vectorized. In loops this is expected
10985 to include:
10986 - the COND_EXPR which is the loop exit condition
10987 - any LABEL_EXPRs in the loop
10988 - computations that are used only for array indexing or loop control.
10989 In basic blocks we only analyze statements that are a part of some SLP
10990 instance, therefore, all the statements are relevant.
10991
10992 Pattern statement needs to be analyzed instead of the original statement
10993 if the original statement is not relevant. Otherwise, we analyze both
10994 statements. In basic blocks we are called from some SLP instance
10995 traversal, don't analyze pattern stmts instead, the pattern stmts
10996 already will be part of SLP instance. */
10997
10998 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10999 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11000 && !STMT_VINFO_LIVE_P (stmt_info))
11001 {
11002 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11003 && pattern_stmt_info
11004 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11005 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11006 {
11007 /* Analyze PATTERN_STMT instead of the original stmt. */
11008 stmt_info = pattern_stmt_info;
11009 if (dump_enabled_p ())
11010 dump_printf_loc (MSG_NOTE, vect_location,
11011 "==> examining pattern statement: %G",
11012 stmt_info->stmt);
11013 }
11014 else
11015 {
11016 if (dump_enabled_p ())
11017 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11018
11019 return opt_result::success ();
11020 }
11021 }
11022 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11023 && node == NULL
11024 && pattern_stmt_info
11025 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11026 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11027 {
11028 /* Analyze PATTERN_STMT too. */
11029 if (dump_enabled_p ())
11030 dump_printf_loc (MSG_NOTE, vect_location,
11031 "==> examining pattern statement: %G",
11032 pattern_stmt_info->stmt);
11033
11034 opt_result res
11035 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11036 node_instance, cost_vec);
11037 if (!res)
11038 return res;
11039 }
11040
11041 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11042 {
11043 case vect_internal_def:
11044 break;
11045
11046 case vect_reduction_def:
11047 case vect_nested_cycle:
11048 gcc_assert (!bb_vinfo
11049 && (relevance == vect_used_in_outer
11050 || relevance == vect_used_in_outer_by_reduction
11051 || relevance == vect_used_by_reduction
11052 || relevance == vect_unused_in_scope
11053 || relevance == vect_used_only_live));
11054 break;
11055
11056 case vect_induction_def:
11057 gcc_assert (!bb_vinfo);
11058 break;
11059
11060 case vect_constant_def:
11061 case vect_external_def:
11062 case vect_unknown_def_type:
11063 default:
11064 gcc_unreachable ();
11065 }
11066
11067 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11068 if (node)
11069 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11070
11071 if (STMT_VINFO_RELEVANT_P (stmt_info))
11072 {
11073 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11074 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11075 || (call && gimple_call_lhs (call) == NULL_TREE));
11076 *need_to_vectorize = true;
11077 }
11078
11079 if (PURE_SLP_STMT (stmt_info) && !node)
11080 {
11081 if (dump_enabled_p ())
11082 dump_printf_loc (MSG_NOTE, vect_location,
11083 "handled only by SLP analysis\n");
11084 return opt_result::success ();
11085 }
11086
11087 ok = true;
11088 if (!bb_vinfo
11089 && (STMT_VINFO_RELEVANT_P (stmt_info)
11090 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11091 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11092 -mveclibabi= takes preference over library functions with
11093 the simd attribute. */
11094 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11095 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11096 cost_vec)
11097 || vectorizable_conversion (vinfo, stmt_info,
11098 NULL, NULL, node, cost_vec)
11099 || vectorizable_operation (vinfo, stmt_info,
11100 NULL, NULL, node, cost_vec)
11101 || vectorizable_assignment (vinfo, stmt_info,
11102 NULL, NULL, node, cost_vec)
11103 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11104 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11105 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11106 node, node_instance, cost_vec)
11107 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11108 NULL, node, cost_vec)
11109 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11110 || vectorizable_condition (vinfo, stmt_info,
11111 NULL, NULL, node, cost_vec)
11112 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11113 cost_vec)
11114 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11115 stmt_info, NULL, node));
11116 else
11117 {
11118 if (bb_vinfo)
11119 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11120 || vectorizable_simd_clone_call (vinfo, stmt_info,
11121 NULL, NULL, node, cost_vec)
11122 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11123 cost_vec)
11124 || vectorizable_shift (vinfo, stmt_info,
11125 NULL, NULL, node, cost_vec)
11126 || vectorizable_operation (vinfo, stmt_info,
11127 NULL, NULL, node, cost_vec)
11128 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11129 cost_vec)
11130 || vectorizable_load (vinfo, stmt_info,
11131 NULL, NULL, node, cost_vec)
11132 || vectorizable_store (vinfo, stmt_info,
11133 NULL, NULL, node, cost_vec)
11134 || vectorizable_condition (vinfo, stmt_info,
11135 NULL, NULL, node, cost_vec)
11136 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11137 cost_vec)
11138 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11139 }
11140
11141 if (node)
11142 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11143
11144 if (!ok)
11145 return opt_result::failure_at (stmt_info->stmt,
11146 "not vectorized:"
11147 " relevant stmt not supported: %G",
11148 stmt_info->stmt);
11149
11150 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11151 need extra handling, except for vectorizable reductions. */
11152 if (!bb_vinfo
11153 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11154 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11155 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11156 stmt_info, NULL, node, node_instance,
11157 false, cost_vec))
11158 return opt_result::failure_at (stmt_info->stmt,
11159 "not vectorized:"
11160 " live stmt not supported: %G",
11161 stmt_info->stmt);
11162
11163 return opt_result::success ();
11164 }
11165
11166
11167 /* Function vect_transform_stmt.
11168
11169 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11170
11171 bool
11172 vect_transform_stmt (vec_info *vinfo,
11173 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11174 slp_tree slp_node, slp_instance slp_node_instance)
11175 {
11176 bool is_store = false;
11177 gimple *vec_stmt = NULL;
11178 bool done;
11179
11180 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11181
11182 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11183 if (slp_node)
11184 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11185
11186 switch (STMT_VINFO_TYPE (stmt_info))
11187 {
11188 case type_demotion_vec_info_type:
11189 case type_promotion_vec_info_type:
11190 case type_conversion_vec_info_type:
11191 done = vectorizable_conversion (vinfo, stmt_info,
11192 gsi, &vec_stmt, slp_node, NULL);
11193 gcc_assert (done);
11194 break;
11195
11196 case induc_vec_info_type:
11197 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11198 stmt_info, &vec_stmt, slp_node,
11199 NULL);
11200 gcc_assert (done);
11201 break;
11202
11203 case shift_vec_info_type:
11204 done = vectorizable_shift (vinfo, stmt_info,
11205 gsi, &vec_stmt, slp_node, NULL);
11206 gcc_assert (done);
11207 break;
11208
11209 case op_vec_info_type:
11210 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11211 NULL);
11212 gcc_assert (done);
11213 break;
11214
11215 case assignment_vec_info_type:
11216 done = vectorizable_assignment (vinfo, stmt_info,
11217 gsi, &vec_stmt, slp_node, NULL);
11218 gcc_assert (done);
11219 break;
11220
11221 case load_vec_info_type:
11222 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11223 NULL);
11224 gcc_assert (done);
11225 break;
11226
11227 case store_vec_info_type:
11228 done = vectorizable_store (vinfo, stmt_info,
11229 gsi, &vec_stmt, slp_node, NULL);
11230 gcc_assert (done);
11231 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11232 {
11233 /* In case of interleaving, the whole chain is vectorized when the
11234 last store in the chain is reached. Store stmts before the last
11235 one are skipped, and there vec_stmt_info shouldn't be freed
11236 meanwhile. */
11237 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11238 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11239 is_store = true;
11240 }
11241 else
11242 is_store = true;
11243 break;
11244
11245 case condition_vec_info_type:
11246 done = vectorizable_condition (vinfo, stmt_info,
11247 gsi, &vec_stmt, slp_node, NULL);
11248 gcc_assert (done);
11249 break;
11250
11251 case comparison_vec_info_type:
11252 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11253 slp_node, NULL);
11254 gcc_assert (done);
11255 break;
11256
11257 case call_vec_info_type:
11258 done = vectorizable_call (vinfo, stmt_info,
11259 gsi, &vec_stmt, slp_node, NULL);
11260 break;
11261
11262 case call_simd_clone_vec_info_type:
11263 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11264 slp_node, NULL);
11265 break;
11266
11267 case reduc_vec_info_type:
11268 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11269 gsi, &vec_stmt, slp_node);
11270 gcc_assert (done);
11271 break;
11272
11273 case cycle_phi_info_type:
11274 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11275 &vec_stmt, slp_node, slp_node_instance);
11276 gcc_assert (done);
11277 break;
11278
11279 case lc_phi_info_type:
11280 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11281 stmt_info, &vec_stmt, slp_node);
11282 gcc_assert (done);
11283 break;
11284
11285 case phi_info_type:
11286 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11287 gcc_assert (done);
11288 break;
11289
11290 default:
11291 if (!STMT_VINFO_LIVE_P (stmt_info))
11292 {
11293 if (dump_enabled_p ())
11294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11295 "stmt not supported.\n");
11296 gcc_unreachable ();
11297 }
11298 done = true;
11299 }
11300
11301 if (!slp_node && vec_stmt)
11302 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11303
11304 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11305 {
11306 /* Handle stmts whose DEF is used outside the loop-nest that is
11307 being vectorized. */
11308 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11309 slp_node_instance, true, NULL);
11310 gcc_assert (done);
11311 }
11312
11313 if (slp_node)
11314 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11315
11316 return is_store;
11317 }
11318
11319
11320 /* Remove a group of stores (for SLP or interleaving), free their
11321 stmt_vec_info. */
11322
11323 void
11324 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11325 {
11326 stmt_vec_info next_stmt_info = first_stmt_info;
11327
11328 while (next_stmt_info)
11329 {
11330 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11331 next_stmt_info = vect_orig_stmt (next_stmt_info);
11332 /* Free the attached stmt_vec_info and remove the stmt. */
11333 vinfo->remove_stmt (next_stmt_info);
11334 next_stmt_info = tmp;
11335 }
11336 }
11337
11338 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11339 elements of type SCALAR_TYPE, or null if the target doesn't support
11340 such a type.
11341
11342 If NUNITS is zero, return a vector type that contains elements of
11343 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11344
11345 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11346 for this vectorization region and want to "autodetect" the best choice.
11347 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11348 and we want the new type to be interoperable with it. PREVAILING_MODE
11349 in this case can be a scalar integer mode or a vector mode; when it
11350 is a vector mode, the function acts like a tree-level version of
11351 related_vector_mode. */
11352
11353 tree
11354 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11355 tree scalar_type, poly_uint64 nunits)
11356 {
11357 tree orig_scalar_type = scalar_type;
11358 scalar_mode inner_mode;
11359 machine_mode simd_mode;
11360 tree vectype;
11361
11362 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11363 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11364 return NULL_TREE;
11365
11366 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11367
11368 /* For vector types of elements whose mode precision doesn't
11369 match their types precision we use a element type of mode
11370 precision. The vectorization routines will have to make sure
11371 they support the proper result truncation/extension.
11372 We also make sure to build vector types with INTEGER_TYPE
11373 component type only. */
11374 if (INTEGRAL_TYPE_P (scalar_type)
11375 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11376 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11377 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11378 TYPE_UNSIGNED (scalar_type));
11379
11380 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11381 When the component mode passes the above test simply use a type
11382 corresponding to that mode. The theory is that any use that
11383 would cause problems with this will disable vectorization anyway. */
11384 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11385 && !INTEGRAL_TYPE_P (scalar_type))
11386 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11387
11388 /* We can't build a vector type of elements with alignment bigger than
11389 their size. */
11390 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11391 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11392 TYPE_UNSIGNED (scalar_type));
11393
11394 /* If we felt back to using the mode fail if there was
11395 no scalar type for it. */
11396 if (scalar_type == NULL_TREE)
11397 return NULL_TREE;
11398
11399 /* If no prevailing mode was supplied, use the mode the target prefers.
11400 Otherwise lookup a vector mode based on the prevailing mode. */
11401 if (prevailing_mode == VOIDmode)
11402 {
11403 gcc_assert (known_eq (nunits, 0U));
11404 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11405 if (SCALAR_INT_MODE_P (simd_mode))
11406 {
11407 /* Traditional behavior is not to take the integer mode
11408 literally, but simply to use it as a way of determining
11409 the vector size. It is up to mode_for_vector to decide
11410 what the TYPE_MODE should be.
11411
11412 Note that nunits == 1 is allowed in order to support single
11413 element vector types. */
11414 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11415 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11416 return NULL_TREE;
11417 }
11418 }
11419 else if (SCALAR_INT_MODE_P (prevailing_mode)
11420 || !related_vector_mode (prevailing_mode,
11421 inner_mode, nunits).exists (&simd_mode))
11422 {
11423 /* Fall back to using mode_for_vector, mostly in the hope of being
11424 able to use an integer mode. */
11425 if (known_eq (nunits, 0U)
11426 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11427 return NULL_TREE;
11428
11429 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11430 return NULL_TREE;
11431 }
11432
11433 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11434
11435 /* In cases where the mode was chosen by mode_for_vector, check that
11436 the target actually supports the chosen mode, or that it at least
11437 allows the vector mode to be replaced by a like-sized integer. */
11438 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11439 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11440 return NULL_TREE;
11441
11442 /* Re-attach the address-space qualifier if we canonicalized the scalar
11443 type. */
11444 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11445 return build_qualified_type
11446 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11447
11448 return vectype;
11449 }
11450
11451 /* Function get_vectype_for_scalar_type.
11452
11453 Returns the vector type corresponding to SCALAR_TYPE as supported
11454 by the target. If GROUP_SIZE is nonzero and we're performing BB
11455 vectorization, make sure that the number of elements in the vector
11456 is no bigger than GROUP_SIZE. */
11457
11458 tree
11459 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11460 unsigned int group_size)
11461 {
11462 /* For BB vectorization, we should always have a group size once we've
11463 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11464 are tentative requests during things like early data reference
11465 analysis and pattern recognition. */
11466 if (is_a <bb_vec_info> (vinfo))
11467 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11468 else
11469 group_size = 0;
11470
11471 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11472 scalar_type);
11473 if (vectype && vinfo->vector_mode == VOIDmode)
11474 vinfo->vector_mode = TYPE_MODE (vectype);
11475
11476 /* Register the natural choice of vector type, before the group size
11477 has been applied. */
11478 if (vectype)
11479 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11480
11481 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11482 try again with an explicit number of elements. */
11483 if (vectype
11484 && group_size
11485 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11486 {
11487 /* Start with the biggest number of units that fits within
11488 GROUP_SIZE and halve it until we find a valid vector type.
11489 Usually either the first attempt will succeed or all will
11490 fail (in the latter case because GROUP_SIZE is too small
11491 for the target), but it's possible that a target could have
11492 a hole between supported vector types.
11493
11494 If GROUP_SIZE is not a power of 2, this has the effect of
11495 trying the largest power of 2 that fits within the group,
11496 even though the group is not a multiple of that vector size.
11497 The BB vectorizer will then try to carve up the group into
11498 smaller pieces. */
11499 unsigned int nunits = 1 << floor_log2 (group_size);
11500 do
11501 {
11502 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11503 scalar_type, nunits);
11504 nunits /= 2;
11505 }
11506 while (nunits > 1 && !vectype);
11507 }
11508
11509 return vectype;
11510 }
11511
11512 /* Return the vector type corresponding to SCALAR_TYPE as supported
11513 by the target. NODE, if nonnull, is the SLP tree node that will
11514 use the returned vector type. */
11515
11516 tree
11517 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11518 {
11519 unsigned int group_size = 0;
11520 if (node)
11521 group_size = SLP_TREE_LANES (node);
11522 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11523 }
11524
11525 /* Function get_mask_type_for_scalar_type.
11526
11527 Returns the mask type corresponding to a result of comparison
11528 of vectors of specified SCALAR_TYPE as supported by target.
11529 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11530 make sure that the number of elements in the vector is no bigger
11531 than GROUP_SIZE. */
11532
11533 tree
11534 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11535 unsigned int group_size)
11536 {
11537 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11538
11539 if (!vectype)
11540 return NULL;
11541
11542 return truth_type_for (vectype);
11543 }
11544
11545 /* Function get_same_sized_vectype
11546
11547 Returns a vector type corresponding to SCALAR_TYPE of size
11548 VECTOR_TYPE if supported by the target. */
11549
11550 tree
11551 get_same_sized_vectype (tree scalar_type, tree vector_type)
11552 {
11553 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11554 return truth_type_for (vector_type);
11555
11556 poly_uint64 nunits;
11557 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11558 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11559 return NULL_TREE;
11560
11561 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11562 scalar_type, nunits);
11563 }
11564
11565 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11566 would not change the chosen vector modes. */
11567
11568 bool
11569 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11570 {
11571 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11572 i != vinfo->used_vector_modes.end (); ++i)
11573 if (!VECTOR_MODE_P (*i)
11574 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11575 return false;
11576 return true;
11577 }
11578
11579 /* Function vect_is_simple_use.
11580
11581 Input:
11582 VINFO - the vect info of the loop or basic block that is being vectorized.
11583 OPERAND - operand in the loop or bb.
11584 Output:
11585 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11586 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11587 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11588 the definition could be anywhere in the function
11589 DT - the type of definition
11590
11591 Returns whether a stmt with OPERAND can be vectorized.
11592 For loops, supportable operands are constants, loop invariants, and operands
11593 that are defined by the current iteration of the loop. Unsupportable
11594 operands are those that are defined by a previous iteration of the loop (as
11595 is the case in reduction/induction computations).
11596 For basic blocks, supportable operands are constants and bb invariants.
11597 For now, operands defined outside the basic block are not supported. */
11598
11599 bool
11600 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11601 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11602 {
11603 if (def_stmt_info_out)
11604 *def_stmt_info_out = NULL;
11605 if (def_stmt_out)
11606 *def_stmt_out = NULL;
11607 *dt = vect_unknown_def_type;
11608
11609 if (dump_enabled_p ())
11610 {
11611 dump_printf_loc (MSG_NOTE, vect_location,
11612 "vect_is_simple_use: operand ");
11613 if (TREE_CODE (operand) == SSA_NAME
11614 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11615 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11616 else
11617 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11618 }
11619
11620 if (CONSTANT_CLASS_P (operand))
11621 *dt = vect_constant_def;
11622 else if (is_gimple_min_invariant (operand))
11623 *dt = vect_external_def;
11624 else if (TREE_CODE (operand) != SSA_NAME)
11625 *dt = vect_unknown_def_type;
11626 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11627 *dt = vect_external_def;
11628 else
11629 {
11630 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11631 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11632 if (!stmt_vinfo)
11633 *dt = vect_external_def;
11634 else
11635 {
11636 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11637 def_stmt = stmt_vinfo->stmt;
11638 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11639 if (def_stmt_info_out)
11640 *def_stmt_info_out = stmt_vinfo;
11641 }
11642 if (def_stmt_out)
11643 *def_stmt_out = def_stmt;
11644 }
11645
11646 if (dump_enabled_p ())
11647 {
11648 dump_printf (MSG_NOTE, ", type of def: ");
11649 switch (*dt)
11650 {
11651 case vect_uninitialized_def:
11652 dump_printf (MSG_NOTE, "uninitialized\n");
11653 break;
11654 case vect_constant_def:
11655 dump_printf (MSG_NOTE, "constant\n");
11656 break;
11657 case vect_external_def:
11658 dump_printf (MSG_NOTE, "external\n");
11659 break;
11660 case vect_internal_def:
11661 dump_printf (MSG_NOTE, "internal\n");
11662 break;
11663 case vect_induction_def:
11664 dump_printf (MSG_NOTE, "induction\n");
11665 break;
11666 case vect_reduction_def:
11667 dump_printf (MSG_NOTE, "reduction\n");
11668 break;
11669 case vect_double_reduction_def:
11670 dump_printf (MSG_NOTE, "double reduction\n");
11671 break;
11672 case vect_nested_cycle:
11673 dump_printf (MSG_NOTE, "nested cycle\n");
11674 break;
11675 case vect_unknown_def_type:
11676 dump_printf (MSG_NOTE, "unknown\n");
11677 break;
11678 }
11679 }
11680
11681 if (*dt == vect_unknown_def_type)
11682 {
11683 if (dump_enabled_p ())
11684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11685 "Unsupported pattern.\n");
11686 return false;
11687 }
11688
11689 return true;
11690 }
11691
11692 /* Function vect_is_simple_use.
11693
11694 Same as vect_is_simple_use but also determines the vector operand
11695 type of OPERAND and stores it to *VECTYPE. If the definition of
11696 OPERAND is vect_uninitialized_def, vect_constant_def or
11697 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11698 is responsible to compute the best suited vector type for the
11699 scalar operand. */
11700
11701 bool
11702 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11703 tree *vectype, stmt_vec_info *def_stmt_info_out,
11704 gimple **def_stmt_out)
11705 {
11706 stmt_vec_info def_stmt_info;
11707 gimple *def_stmt;
11708 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11709 return false;
11710
11711 if (def_stmt_out)
11712 *def_stmt_out = def_stmt;
11713 if (def_stmt_info_out)
11714 *def_stmt_info_out = def_stmt_info;
11715
11716 /* Now get a vector type if the def is internal, otherwise supply
11717 NULL_TREE and leave it up to the caller to figure out a proper
11718 type for the use stmt. */
11719 if (*dt == vect_internal_def
11720 || *dt == vect_induction_def
11721 || *dt == vect_reduction_def
11722 || *dt == vect_double_reduction_def
11723 || *dt == vect_nested_cycle)
11724 {
11725 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11726 gcc_assert (*vectype != NULL_TREE);
11727 if (dump_enabled_p ())
11728 dump_printf_loc (MSG_NOTE, vect_location,
11729 "vect_is_simple_use: vectype %T\n", *vectype);
11730 }
11731 else if (*dt == vect_uninitialized_def
11732 || *dt == vect_constant_def
11733 || *dt == vect_external_def)
11734 *vectype = NULL_TREE;
11735 else
11736 gcc_unreachable ();
11737
11738 return true;
11739 }
11740
11741 /* Function vect_is_simple_use.
11742
11743 Same as vect_is_simple_use but determines the operand by operand
11744 position OPERAND from either STMT or SLP_NODE, filling in *OP
11745 and *SLP_DEF (when SLP_NODE is not NULL). */
11746
11747 bool
11748 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11749 unsigned operand, tree *op, slp_tree *slp_def,
11750 enum vect_def_type *dt,
11751 tree *vectype, stmt_vec_info *def_stmt_info_out)
11752 {
11753 if (slp_node)
11754 {
11755 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11756 *slp_def = child;
11757 *vectype = SLP_TREE_VECTYPE (child);
11758 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11759 {
11760 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11761 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11762 }
11763 else
11764 {
11765 if (def_stmt_info_out)
11766 *def_stmt_info_out = NULL;
11767 *op = SLP_TREE_SCALAR_OPS (child)[0];
11768 *dt = SLP_TREE_DEF_TYPE (child);
11769 return true;
11770 }
11771 }
11772 else
11773 {
11774 *slp_def = NULL;
11775 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11776 {
11777 if (gimple_assign_rhs_code (ass) == COND_EXPR
11778 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11779 {
11780 if (operand < 2)
11781 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11782 else
11783 *op = gimple_op (ass, operand);
11784 }
11785 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11786 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11787 else
11788 *op = gimple_op (ass, operand + 1);
11789 }
11790 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11791 *op = gimple_call_arg (call, operand);
11792 else
11793 gcc_unreachable ();
11794 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11795 }
11796 }
11797
11798 /* If OP is not NULL and is external or constant update its vector
11799 type with VECTYPE. Returns true if successful or false if not,
11800 for example when conflicting vector types are present. */
11801
11802 bool
11803 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11804 {
11805 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11806 return true;
11807 if (SLP_TREE_VECTYPE (op))
11808 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11809 SLP_TREE_VECTYPE (op) = vectype;
11810 return true;
11811 }
11812
11813 /* Function supportable_widening_operation
11814
11815 Check whether an operation represented by the code CODE is a
11816 widening operation that is supported by the target platform in
11817 vector form (i.e., when operating on arguments of type VECTYPE_IN
11818 producing a result of type VECTYPE_OUT).
11819
11820 Widening operations we currently support are NOP (CONVERT), FLOAT,
11821 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11822 are supported by the target platform either directly (via vector
11823 tree-codes), or via target builtins.
11824
11825 Output:
11826 - CODE1 and CODE2 are codes of vector operations to be used when
11827 vectorizing the operation, if available.
11828 - MULTI_STEP_CVT determines the number of required intermediate steps in
11829 case of multi-step conversion (like char->short->int - in that case
11830 MULTI_STEP_CVT will be 1).
11831 - INTERM_TYPES contains the intermediate type required to perform the
11832 widening operation (short in the above example). */
11833
11834 bool
11835 supportable_widening_operation (vec_info *vinfo,
11836 enum tree_code code, stmt_vec_info stmt_info,
11837 tree vectype_out, tree vectype_in,
11838 enum tree_code *code1, enum tree_code *code2,
11839 int *multi_step_cvt,
11840 vec<tree> *interm_types)
11841 {
11842 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11843 class loop *vect_loop = NULL;
11844 machine_mode vec_mode;
11845 enum insn_code icode1, icode2;
11846 optab optab1, optab2;
11847 tree vectype = vectype_in;
11848 tree wide_vectype = vectype_out;
11849 enum tree_code c1, c2;
11850 int i;
11851 tree prev_type, intermediate_type;
11852 machine_mode intermediate_mode, prev_mode;
11853 optab optab3, optab4;
11854
11855 *multi_step_cvt = 0;
11856 if (loop_info)
11857 vect_loop = LOOP_VINFO_LOOP (loop_info);
11858
11859 switch (code)
11860 {
11861 case WIDEN_MULT_EXPR:
11862 /* The result of a vectorized widening operation usually requires
11863 two vectors (because the widened results do not fit into one vector).
11864 The generated vector results would normally be expected to be
11865 generated in the same order as in the original scalar computation,
11866 i.e. if 8 results are generated in each vector iteration, they are
11867 to be organized as follows:
11868 vect1: [res1,res2,res3,res4],
11869 vect2: [res5,res6,res7,res8].
11870
11871 However, in the special case that the result of the widening
11872 operation is used in a reduction computation only, the order doesn't
11873 matter (because when vectorizing a reduction we change the order of
11874 the computation). Some targets can take advantage of this and
11875 generate more efficient code. For example, targets like Altivec,
11876 that support widen_mult using a sequence of {mult_even,mult_odd}
11877 generate the following vectors:
11878 vect1: [res1,res3,res5,res7],
11879 vect2: [res2,res4,res6,res8].
11880
11881 When vectorizing outer-loops, we execute the inner-loop sequentially
11882 (each vectorized inner-loop iteration contributes to VF outer-loop
11883 iterations in parallel). We therefore don't allow to change the
11884 order of the computation in the inner-loop during outer-loop
11885 vectorization. */
11886 /* TODO: Another case in which order doesn't *really* matter is when we
11887 widen and then contract again, e.g. (short)((int)x * y >> 8).
11888 Normally, pack_trunc performs an even/odd permute, whereas the
11889 repack from an even/odd expansion would be an interleave, which
11890 would be significantly simpler for e.g. AVX2. */
11891 /* In any case, in order to avoid duplicating the code below, recurse
11892 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11893 are properly set up for the caller. If we fail, we'll continue with
11894 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11895 if (vect_loop
11896 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11897 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11898 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11899 stmt_info, vectype_out,
11900 vectype_in, code1, code2,
11901 multi_step_cvt, interm_types))
11902 {
11903 /* Elements in a vector with vect_used_by_reduction property cannot
11904 be reordered if the use chain with this property does not have the
11905 same operation. One such an example is s += a * b, where elements
11906 in a and b cannot be reordered. Here we check if the vector defined
11907 by STMT is only directly used in the reduction statement. */
11908 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11909 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11910 if (use_stmt_info
11911 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11912 return true;
11913 }
11914 c1 = VEC_WIDEN_MULT_LO_EXPR;
11915 c2 = VEC_WIDEN_MULT_HI_EXPR;
11916 break;
11917
11918 case DOT_PROD_EXPR:
11919 c1 = DOT_PROD_EXPR;
11920 c2 = DOT_PROD_EXPR;
11921 break;
11922
11923 case SAD_EXPR:
11924 c1 = SAD_EXPR;
11925 c2 = SAD_EXPR;
11926 break;
11927
11928 case VEC_WIDEN_MULT_EVEN_EXPR:
11929 /* Support the recursion induced just above. */
11930 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11931 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11932 break;
11933
11934 case WIDEN_LSHIFT_EXPR:
11935 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11936 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11937 break;
11938
11939 case WIDEN_PLUS_EXPR:
11940 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11941 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11942 break;
11943
11944 case WIDEN_MINUS_EXPR:
11945 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11946 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11947 break;
11948
11949 CASE_CONVERT:
11950 c1 = VEC_UNPACK_LO_EXPR;
11951 c2 = VEC_UNPACK_HI_EXPR;
11952 break;
11953
11954 case FLOAT_EXPR:
11955 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11956 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11957 break;
11958
11959 case FIX_TRUNC_EXPR:
11960 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11961 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11962 break;
11963
11964 default:
11965 gcc_unreachable ();
11966 }
11967
11968 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11969 std::swap (c1, c2);
11970
11971 if (code == FIX_TRUNC_EXPR)
11972 {
11973 /* The signedness is determined from output operand. */
11974 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11975 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11976 }
11977 else if (CONVERT_EXPR_CODE_P (code)
11978 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11979 && VECTOR_BOOLEAN_TYPE_P (vectype)
11980 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11981 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11982 {
11983 /* If the input and result modes are the same, a different optab
11984 is needed where we pass in the number of units in vectype. */
11985 optab1 = vec_unpacks_sbool_lo_optab;
11986 optab2 = vec_unpacks_sbool_hi_optab;
11987 }
11988 else
11989 {
11990 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11991 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11992 }
11993
11994 if (!optab1 || !optab2)
11995 return false;
11996
11997 vec_mode = TYPE_MODE (vectype);
11998 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11999 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12000 return false;
12001
12002 *code1 = c1;
12003 *code2 = c2;
12004
12005 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12006 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12007 {
12008 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12009 return true;
12010 /* For scalar masks we may have different boolean
12011 vector types having the same QImode. Thus we
12012 add additional check for elements number. */
12013 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12014 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12015 return true;
12016 }
12017
12018 /* Check if it's a multi-step conversion that can be done using intermediate
12019 types. */
12020
12021 prev_type = vectype;
12022 prev_mode = vec_mode;
12023
12024 if (!CONVERT_EXPR_CODE_P (code))
12025 return false;
12026
12027 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12028 intermediate steps in promotion sequence. We try
12029 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12030 not. */
12031 interm_types->create (MAX_INTERM_CVT_STEPS);
12032 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12033 {
12034 intermediate_mode = insn_data[icode1].operand[0].mode;
12035 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12036 intermediate_type
12037 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12038 else
12039 intermediate_type
12040 = lang_hooks.types.type_for_mode (intermediate_mode,
12041 TYPE_UNSIGNED (prev_type));
12042
12043 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12044 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12045 && intermediate_mode == prev_mode
12046 && SCALAR_INT_MODE_P (prev_mode))
12047 {
12048 /* If the input and result modes are the same, a different optab
12049 is needed where we pass in the number of units in vectype. */
12050 optab3 = vec_unpacks_sbool_lo_optab;
12051 optab4 = vec_unpacks_sbool_hi_optab;
12052 }
12053 else
12054 {
12055 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12056 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12057 }
12058
12059 if (!optab3 || !optab4
12060 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12061 || insn_data[icode1].operand[0].mode != intermediate_mode
12062 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12063 || insn_data[icode2].operand[0].mode != intermediate_mode
12064 || ((icode1 = optab_handler (optab3, intermediate_mode))
12065 == CODE_FOR_nothing)
12066 || ((icode2 = optab_handler (optab4, intermediate_mode))
12067 == CODE_FOR_nothing))
12068 break;
12069
12070 interm_types->quick_push (intermediate_type);
12071 (*multi_step_cvt)++;
12072
12073 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12074 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12075 {
12076 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12077 return true;
12078 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12079 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12080 return true;
12081 }
12082
12083 prev_type = intermediate_type;
12084 prev_mode = intermediate_mode;
12085 }
12086
12087 interm_types->release ();
12088 return false;
12089 }
12090
12091
12092 /* Function supportable_narrowing_operation
12093
12094 Check whether an operation represented by the code CODE is a
12095 narrowing operation that is supported by the target platform in
12096 vector form (i.e., when operating on arguments of type VECTYPE_IN
12097 and producing a result of type VECTYPE_OUT).
12098
12099 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12100 and FLOAT. This function checks if these operations are supported by
12101 the target platform directly via vector tree-codes.
12102
12103 Output:
12104 - CODE1 is the code of a vector operation to be used when
12105 vectorizing the operation, if available.
12106 - MULTI_STEP_CVT determines the number of required intermediate steps in
12107 case of multi-step conversion (like int->short->char - in that case
12108 MULTI_STEP_CVT will be 1).
12109 - INTERM_TYPES contains the intermediate type required to perform the
12110 narrowing operation (short in the above example). */
12111
12112 bool
12113 supportable_narrowing_operation (enum tree_code code,
12114 tree vectype_out, tree vectype_in,
12115 enum tree_code *code1, int *multi_step_cvt,
12116 vec<tree> *interm_types)
12117 {
12118 machine_mode vec_mode;
12119 enum insn_code icode1;
12120 optab optab1, interm_optab;
12121 tree vectype = vectype_in;
12122 tree narrow_vectype = vectype_out;
12123 enum tree_code c1;
12124 tree intermediate_type, prev_type;
12125 machine_mode intermediate_mode, prev_mode;
12126 int i;
12127 bool uns;
12128
12129 *multi_step_cvt = 0;
12130 switch (code)
12131 {
12132 CASE_CONVERT:
12133 c1 = VEC_PACK_TRUNC_EXPR;
12134 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12135 && VECTOR_BOOLEAN_TYPE_P (vectype)
12136 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12137 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12138 optab1 = vec_pack_sbool_trunc_optab;
12139 else
12140 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12141 break;
12142
12143 case FIX_TRUNC_EXPR:
12144 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12145 /* The signedness is determined from output operand. */
12146 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12147 break;
12148
12149 case FLOAT_EXPR:
12150 c1 = VEC_PACK_FLOAT_EXPR;
12151 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12152 break;
12153
12154 default:
12155 gcc_unreachable ();
12156 }
12157
12158 if (!optab1)
12159 return false;
12160
12161 vec_mode = TYPE_MODE (vectype);
12162 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12163 return false;
12164
12165 *code1 = c1;
12166
12167 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12168 {
12169 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12170 return true;
12171 /* For scalar masks we may have different boolean
12172 vector types having the same QImode. Thus we
12173 add additional check for elements number. */
12174 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12175 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12176 return true;
12177 }
12178
12179 if (code == FLOAT_EXPR)
12180 return false;
12181
12182 /* Check if it's a multi-step conversion that can be done using intermediate
12183 types. */
12184 prev_mode = vec_mode;
12185 prev_type = vectype;
12186 if (code == FIX_TRUNC_EXPR)
12187 uns = TYPE_UNSIGNED (vectype_out);
12188 else
12189 uns = TYPE_UNSIGNED (vectype);
12190
12191 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12192 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12193 costly than signed. */
12194 if (code == FIX_TRUNC_EXPR && uns)
12195 {
12196 enum insn_code icode2;
12197
12198 intermediate_type
12199 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12200 interm_optab
12201 = optab_for_tree_code (c1, intermediate_type, optab_default);
12202 if (interm_optab != unknown_optab
12203 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12204 && insn_data[icode1].operand[0].mode
12205 == insn_data[icode2].operand[0].mode)
12206 {
12207 uns = false;
12208 optab1 = interm_optab;
12209 icode1 = icode2;
12210 }
12211 }
12212
12213 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12214 intermediate steps in promotion sequence. We try
12215 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12216 interm_types->create (MAX_INTERM_CVT_STEPS);
12217 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12218 {
12219 intermediate_mode = insn_data[icode1].operand[0].mode;
12220 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12221 intermediate_type
12222 = vect_double_mask_nunits (prev_type, intermediate_mode);
12223 else
12224 intermediate_type
12225 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12226 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12227 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12228 && intermediate_mode == prev_mode
12229 && SCALAR_INT_MODE_P (prev_mode))
12230 interm_optab = vec_pack_sbool_trunc_optab;
12231 else
12232 interm_optab
12233 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12234 optab_default);
12235 if (!interm_optab
12236 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12237 || insn_data[icode1].operand[0].mode != intermediate_mode
12238 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12239 == CODE_FOR_nothing))
12240 break;
12241
12242 interm_types->quick_push (intermediate_type);
12243 (*multi_step_cvt)++;
12244
12245 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12246 {
12247 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12248 return true;
12249 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12250 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12251 return true;
12252 }
12253
12254 prev_mode = intermediate_mode;
12255 prev_type = intermediate_type;
12256 optab1 = interm_optab;
12257 }
12258
12259 interm_types->release ();
12260 return false;
12261 }
12262
12263 /* Generate and return a vector mask of MASK_TYPE such that
12264 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12265 Add the statements to SEQ. */
12266
12267 tree
12268 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12269 tree end_index, const char *name)
12270 {
12271 tree cmp_type = TREE_TYPE (start_index);
12272 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12273 cmp_type, mask_type,
12274 OPTIMIZE_FOR_SPEED));
12275 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12276 start_index, end_index,
12277 build_zero_cst (mask_type));
12278 tree tmp;
12279 if (name)
12280 tmp = make_temp_ssa_name (mask_type, NULL, name);
12281 else
12282 tmp = make_ssa_name (mask_type);
12283 gimple_call_set_lhs (call, tmp);
12284 gimple_seq_add_stmt (seq, call);
12285 return tmp;
12286 }
12287
12288 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12289 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12290
12291 tree
12292 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12293 tree end_index)
12294 {
12295 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12296 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12297 }
12298
12299 /* Try to compute the vector types required to vectorize STMT_INFO,
12300 returning true on success and false if vectorization isn't possible.
12301 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12302 take sure that the number of elements in the vectors is no bigger
12303 than GROUP_SIZE.
12304
12305 On success:
12306
12307 - Set *STMT_VECTYPE_OUT to:
12308 - NULL_TREE if the statement doesn't need to be vectorized;
12309 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12310
12311 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12312 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12313 statement does not help to determine the overall number of units. */
12314
12315 opt_result
12316 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12317 tree *stmt_vectype_out,
12318 tree *nunits_vectype_out,
12319 unsigned int group_size)
12320 {
12321 gimple *stmt = stmt_info->stmt;
12322
12323 /* For BB vectorization, we should always have a group size once we've
12324 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12325 are tentative requests during things like early data reference
12326 analysis and pattern recognition. */
12327 if (is_a <bb_vec_info> (vinfo))
12328 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12329 else
12330 group_size = 0;
12331
12332 *stmt_vectype_out = NULL_TREE;
12333 *nunits_vectype_out = NULL_TREE;
12334
12335 if (gimple_get_lhs (stmt) == NULL_TREE
12336 /* MASK_STORE has no lhs, but is ok. */
12337 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12338 {
12339 if (is_a <gcall *> (stmt))
12340 {
12341 /* Ignore calls with no lhs. These must be calls to
12342 #pragma omp simd functions, and what vectorization factor
12343 it really needs can't be determined until
12344 vectorizable_simd_clone_call. */
12345 if (dump_enabled_p ())
12346 dump_printf_loc (MSG_NOTE, vect_location,
12347 "defer to SIMD clone analysis.\n");
12348 return opt_result::success ();
12349 }
12350
12351 return opt_result::failure_at (stmt,
12352 "not vectorized: irregular stmt.%G", stmt);
12353 }
12354
12355 tree vectype;
12356 tree scalar_type = NULL_TREE;
12357 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12358 {
12359 vectype = STMT_VINFO_VECTYPE (stmt_info);
12360 if (dump_enabled_p ())
12361 dump_printf_loc (MSG_NOTE, vect_location,
12362 "precomputed vectype: %T\n", vectype);
12363 }
12364 else if (vect_use_mask_type_p (stmt_info))
12365 {
12366 unsigned int precision = stmt_info->mask_precision;
12367 scalar_type = build_nonstandard_integer_type (precision, 1);
12368 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12369 if (!vectype)
12370 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12371 " data-type %T\n", scalar_type);
12372 if (dump_enabled_p ())
12373 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12374 }
12375 else
12376 {
12377 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12378 scalar_type = TREE_TYPE (DR_REF (dr));
12379 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12380 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12381 else
12382 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12383
12384 if (dump_enabled_p ())
12385 {
12386 if (group_size)
12387 dump_printf_loc (MSG_NOTE, vect_location,
12388 "get vectype for scalar type (group size %d):"
12389 " %T\n", group_size, scalar_type);
12390 else
12391 dump_printf_loc (MSG_NOTE, vect_location,
12392 "get vectype for scalar type: %T\n", scalar_type);
12393 }
12394 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12395 if (!vectype)
12396 return opt_result::failure_at (stmt,
12397 "not vectorized:"
12398 " unsupported data-type %T\n",
12399 scalar_type);
12400
12401 if (dump_enabled_p ())
12402 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12403 }
12404
12405 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12406 return opt_result::failure_at (stmt,
12407 "not vectorized: vector stmt in loop:%G",
12408 stmt);
12409
12410 *stmt_vectype_out = vectype;
12411
12412 /* Don't try to compute scalar types if the stmt produces a boolean
12413 vector; use the existing vector type instead. */
12414 tree nunits_vectype = vectype;
12415 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12416 {
12417 /* The number of units is set according to the smallest scalar
12418 type (or the largest vector size, but we only support one
12419 vector size per vectorization). */
12420 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12421 TREE_TYPE (vectype));
12422 if (scalar_type != TREE_TYPE (vectype))
12423 {
12424 if (dump_enabled_p ())
12425 dump_printf_loc (MSG_NOTE, vect_location,
12426 "get vectype for smallest scalar type: %T\n",
12427 scalar_type);
12428 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12429 group_size);
12430 if (!nunits_vectype)
12431 return opt_result::failure_at
12432 (stmt, "not vectorized: unsupported data-type %T\n",
12433 scalar_type);
12434 if (dump_enabled_p ())
12435 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12436 nunits_vectype);
12437 }
12438 }
12439
12440 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12441 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12442 return opt_result::failure_at (stmt,
12443 "Not vectorized: Incompatible number "
12444 "of vector subparts between %T and %T\n",
12445 nunits_vectype, *stmt_vectype_out);
12446
12447 if (dump_enabled_p ())
12448 {
12449 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12450 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12451 dump_printf (MSG_NOTE, "\n");
12452 }
12453
12454 *nunits_vectype_out = nunits_vectype;
12455 return opt_result::success ();
12456 }
12457
12458 /* Generate and return statement sequence that sets vector length LEN that is:
12459
12460 min_of_start_and_end = min (START_INDEX, END_INDEX);
12461 left_len = END_INDEX - min_of_start_and_end;
12462 rhs = min (left_len, LEN_LIMIT);
12463 LEN = rhs;
12464
12465 Note: the cost of the code generated by this function is modeled
12466 by vect_estimate_min_profitable_iters, so changes here may need
12467 corresponding changes there. */
12468
12469 gimple_seq
12470 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12471 {
12472 gimple_seq stmts = NULL;
12473 tree len_type = TREE_TYPE (len);
12474 gcc_assert (TREE_TYPE (start_index) == len_type);
12475
12476 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12477 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12478 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12479 gimple* stmt = gimple_build_assign (len, rhs);
12480 gimple_seq_add_stmt (&stmts, stmt);
12481
12482 return stmts;
12483 }
12484