]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-stmts.cc
Don't build readline/libreadline.a, when --with-system-readline is supplied
[thirdparty/gcc.git] / gcc / tree-vect-stmts.cc
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 static unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind,
95 stmt_vec_info stmt_info, slp_tree node,
96 tree vectype, int misalign,
97 enum vect_cost_model_location where)
98 {
99 if ((kind == vector_load || kind == unaligned_load)
100 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_scatter_store;
105
106 stmt_info_for_cost si
107 = { count, kind, where, stmt_info, node, vectype, misalign };
108 body_cost_vec->safe_push (si);
109
110 return (unsigned)
111 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 }
113
114 unsigned
115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 tree vectype, int misalign,
118 enum vect_cost_model_location where)
119 {
120 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121 vectype, misalign, where);
122 }
123
124 unsigned
125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126 enum vect_cost_for_stmt kind, slp_tree node,
127 tree vectype, int misalign,
128 enum vect_cost_model_location where)
129 {
130 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131 vectype, misalign, where);
132 }
133
134 unsigned
135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136 enum vect_cost_for_stmt kind,
137 enum vect_cost_model_location where)
138 {
139 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140 || kind == scalar_stmt);
141 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142 NULL_TREE, 0, where);
143 }
144
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
146
147 static tree
148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
149 {
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
152 }
153
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
158
159 static tree
160 read_vector_array (vec_info *vinfo,
161 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
163 {
164 tree vect_type, vect, vect_name, array_ref;
165 gimple *new_stmt;
166
167 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168 vect_type = TREE_TYPE (TREE_TYPE (array));
169 vect = vect_create_destination_var (scalar_dest, vect_type);
170 array_ref = build4 (ARRAY_REF, vect_type, array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
173
174 new_stmt = gimple_build_assign (vect, array_ref);
175 vect_name = make_ssa_name (vect, new_stmt);
176 gimple_assign_set_lhs (new_stmt, vect_name);
177 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
178
179 return vect_name;
180 }
181
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
185
186 static void
187 write_vector_array (vec_info *vinfo,
188 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189 tree vect, tree array, unsigned HOST_WIDE_INT n)
190 {
191 tree array_ref;
192 gimple *new_stmt;
193
194 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195 build_int_cst (size_type_node, n),
196 NULL_TREE, NULL_TREE);
197
198 new_stmt = gimple_build_assign (array_ref, vect);
199 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
200 }
201
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
204 (and its group). */
205
206 static tree
207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
208 {
209 tree mem_ref;
210
211 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214 return mem_ref;
215 }
216
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
219
220 static void
221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222 gimple_stmt_iterator *gsi, tree var)
223 {
224 tree clobber = build_clobber (TREE_TYPE (var));
225 gimple *new_stmt = gimple_build_assign (var, clobber);
226 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
227 }
228
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
230
231 /* Function vect_mark_relevant.
232
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
234
235 static void
236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237 enum vect_relevant relevant, bool live_p)
238 {
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "mark relevant %d, live %d: %G", relevant, live_p,
245 stmt_info->stmt);
246
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252 {
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
257
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE, vect_location,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info = stmt_info;
263 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
267 }
268
269 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271 STMT_VINFO_RELEVANT (stmt_info) = relevant;
272
273 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
275 {
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE, vect_location,
278 "already marked relevant/live.\n");
279 return;
280 }
281
282 worklist->safe_push (stmt_info);
283 }
284
285
286 /* Function is_simple_and_all_uses_invariant
287
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
289
290 bool
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292 loop_vec_info loop_vinfo)
293 {
294 tree op;
295 ssa_op_iter iter;
296
297 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298 if (!stmt)
299 return false;
300
301 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
302 {
303 enum vect_def_type dt = vect_uninitialized_def;
304
305 if (!vect_is_simple_use (op, loop_vinfo, &dt))
306 {
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309 "use not simple.\n");
310 return false;
311 }
312
313 if (dt != vect_external_def && dt != vect_constant_def)
314 return false;
315 }
316 return true;
317 }
318
319 /* Function vect_stmt_relevant_p.
320
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
323
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
328
329 CHECKME: what other side effects would the vectorizer allow? */
330
331 static bool
332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333 enum vect_relevant *relevant, bool *live_p)
334 {
335 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336 ssa_op_iter op_iter;
337 imm_use_iterator imm_iter;
338 use_operand_p use_p;
339 def_operand_p def_p;
340
341 *relevant = vect_unused_in_scope;
342 *live_p = false;
343
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info->stmt)
346 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347 *relevant = vect_used_in_scope;
348
349 /* changing memory. */
350 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351 if (gimple_vdef (stmt_info->stmt)
352 && !gimple_clobber_p (stmt_info->stmt))
353 {
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant = vect_used_in_scope;
358 }
359
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
362 {
363 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
364 {
365 basic_block bb = gimple_bb (USE_STMT (use_p));
366 if (!flow_bb_inside_loop_p (loop, bb))
367 {
368 if (is_gimple_debug (USE_STMT (use_p)))
369 continue;
370
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: used out of loop.\n");
374
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378 gcc_assert (bb == single_exit (loop)->dest);
379
380 *live_p = true;
381 }
382 }
383 }
384
385 if (*live_p && *relevant == vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
387 {
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant = vect_used_only_live;
392 }
393
394 return (*live_p || *relevant);
395 }
396
397
398 /* Function exist_non_indexing_operands_for_use_p
399
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
402
403 static bool
404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
405 {
406 tree operand;
407
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
412 return true;
413
414 /* STMT has a data_ref. FORNOW this means that its of one of
415 the following forms:
416 -1- ARRAY_REF = var
417 -2- var = ARRAY_REF
418 (This should have been verified in analyze_data_refs).
419
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
422 for array indexing.
423
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
426
427 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428 if (!assign || !gimple_assign_copy_p (assign))
429 {
430 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431 if (call && gimple_call_internal_p (call))
432 {
433 internal_fn ifn = gimple_call_internal_fn (call);
434 int mask_index = internal_fn_mask_index (ifn);
435 if (mask_index >= 0
436 && use == gimple_call_arg (call, mask_index))
437 return true;
438 int stored_value_index = internal_fn_stored_value_index (ifn);
439 if (stored_value_index >= 0
440 && use == gimple_call_arg (call, stored_value_index))
441 return true;
442 if (internal_gather_scatter_fn_p (ifn)
443 && use == gimple_call_arg (call, 1))
444 return true;
445 }
446 return false;
447 }
448
449 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450 return false;
451 operand = gimple_assign_rhs1 (assign);
452 if (TREE_CODE (operand) != SSA_NAME)
453 return false;
454
455 if (operand == use)
456 return true;
457
458 return false;
459 }
460
461
462 /*
463 Function process_use.
464
465 Inputs:
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471 be performed.
472
473 Outputs:
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478 Exceptions:
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
486
487 Return true if everything is as expected. Return false otherwise. */
488
489 static opt_result
490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492 bool force)
493 {
494 stmt_vec_info dstmt_vinfo;
495 enum vect_def_type dt;
496
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500 return opt_result::success ();
501
502 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503 return opt_result::failure_at (stmt_vinfo->stmt,
504 "not vectorized:"
505 " unsupported use in stmt.\n");
506
507 if (!dstmt_vinfo)
508 return opt_result::success ();
509
510 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511 basic_block bb = gimple_bb (stmt_vinfo->stmt);
512
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
521 {
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526 return opt_result::success ();
527 }
528
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = dstmt_vinfo
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
537 {
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
541
542 switch (relevant)
543 {
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
548
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
553
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
558
559 case vect_used_in_scope:
560 break;
561
562 default:
563 gcc_unreachable ();
564 }
565 }
566
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
569 ...
570 inner-loop:
571 d = dstmt_vinfo
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
575 {
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
579
580 switch (relevant)
581 {
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
587
588 case vect_used_by_reduction:
589 case vect_used_only_live:
590 relevant = vect_used_in_outer_by_reduction;
591 break;
592
593 case vect_used_in_scope:
594 relevant = vect_used_in_outer;
595 break;
596
597 default:
598 gcc_unreachable ();
599 }
600 }
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
604 of course. */
605 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609 loop_latch_edge (bb->loop_father))
610 == use))
611 {
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "induction value on backedge.\n");
615 return opt_result::success ();
616 }
617
618
619 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620 return opt_result::success ();
621 }
622
623
624 /* Function vect_mark_stmts_to_be_vectorized.
625
626 Not all stmts in the loop need to be vectorized. For example:
627
628 for i...
629 for j...
630 1. T0 = i + j
631 2. T1 = a[T0]
632
633 3. j = j + 1
634
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
637
638 This pass detects such stmts. */
639
640 opt_result
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
642 {
643 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645 unsigned int nbbs = loop->num_nodes;
646 gimple_stmt_iterator si;
647 unsigned int i;
648 basic_block bb;
649 bool live_p;
650 enum vect_relevant relevant;
651
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
653
654 auto_vec<stmt_vec_info, 64> worklist;
655
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
658 {
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
661 {
662 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665 phi_info->stmt);
666
667 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
669 }
670 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
671 {
672 if (is_gimple_debug (gsi_stmt (si)))
673 continue;
674 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location,
677 "init: stmt relevant? %G", stmt_info->stmt);
678
679 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
681 }
682 }
683
684 /* 2. Process_worklist */
685 while (worklist.length () > 0)
686 {
687 use_operand_p use_p;
688 ssa_op_iter iter;
689
690 stmt_vec_info stmt_vinfo = worklist.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location,
693 "worklist: examine stmt: %G", stmt_vinfo->stmt);
694
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
697 of STMT. */
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
699
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
702
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
710
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
712 {
713 case vect_reduction_def:
714 gcc_assert (relevant != vect_unused_in_scope);
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_in_scope
717 && relevant != vect_used_by_reduction
718 && relevant != vect_used_only_live)
719 return opt_result::failure_at
720 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
721 break;
722
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
727 return opt_result::failure_at
728 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729 break;
730
731 case vect_double_reduction_def:
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
735 return opt_result::failure_at
736 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737 break;
738
739 default:
740 break;
741 }
742
743 if (is_pattern_stmt_p (stmt_vinfo))
744 {
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
749 {
750 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751 tree op = gimple_assign_rhs1 (assign);
752
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
755 {
756 opt_result res
757 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758 loop_vinfo, relevant, &worklist, false);
759 if (!res)
760 return res;
761 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762 loop_vinfo, relevant, &worklist, false);
763 if (!res)
764 return res;
765 i = 2;
766 }
767 for (; i < gimple_num_ops (assign); i++)
768 {
769 op = gimple_op (assign, i);
770 if (TREE_CODE (op) == SSA_NAME)
771 {
772 opt_result res
773 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
774 &worklist, false);
775 if (!res)
776 return res;
777 }
778 }
779 }
780 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
781 {
782 for (i = 0; i < gimple_call_num_args (call); i++)
783 {
784 tree arg = gimple_call_arg (call, i);
785 opt_result res
786 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787 &worklist, false);
788 if (!res)
789 return res;
790 }
791 }
792 }
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
795 {
796 tree op = USE_FROM_PTR (use_p);
797 opt_result res
798 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799 &worklist, false);
800 if (!res)
801 return res;
802 }
803
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
805 {
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 opt_result res
810 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811 &worklist, true);
812 if (!res)
813 {
814 if (fatal)
815 *fatal = false;
816 return res;
817 }
818 }
819 } /* while worklist */
820
821 return opt_result::success ();
822 }
823
824 /* Function vect_model_simple_cost.
825
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
829
830 static void
831 vect_model_simple_cost (vec_info *,
832 stmt_vec_info stmt_info, int ncopies,
833 enum vect_def_type *dt,
834 int ndts,
835 slp_tree node,
836 stmt_vector_for_cost *cost_vec,
837 vect_cost_for_stmt kind = vector_stmt)
838 {
839 int inside_cost = 0, prologue_cost = 0;
840
841 gcc_assert (cost_vec != NULL);
842
843 /* ??? Somehow we need to fix this at the callers. */
844 if (node)
845 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
846
847 if (!node)
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850 cost model. */
851 for (int i = 0; i < ndts; i++)
852 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854 stmt_info, 0, vect_prologue);
855
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858 stmt_info, 0, vect_body);
859
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE, vect_location,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost, prologue_cost);
864 }
865
866
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
874
875 static void
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877 enum vect_def_type *dt,
878 unsigned int ncopies, int pwr,
879 stmt_vector_for_cost *cost_vec,
880 bool widen_arith)
881 {
882 int i;
883 int inside_cost = 0, prologue_cost = 0;
884
885 for (i = 0; i < pwr + 1; i++)
886 {
887 inside_cost += record_stmt_cost (cost_vec, ncopies,
888 widen_arith
889 ? vector_stmt : vec_promote_demote,
890 stmt_info, 0, vect_body);
891 ncopies *= 2;
892 }
893
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i = 0; i < 2; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898 stmt_info, 0, vect_prologue);
899
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE, vect_location,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost, prologue_cost);
904 }
905
906 /* Returns true if the current function returns DECL. */
907
908 static bool
909 cfun_returns (tree decl)
910 {
911 edge_iterator ei;
912 edge e;
913 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
914 {
915 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
916 if (!ret)
917 continue;
918 if (gimple_return_retval (ret) == decl)
919 return true;
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
922 though. */
923 gimple *def = ret;
924 do
925 {
926 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
927 }
928 while (gimple_clobber_p (def));
929 if (is_a <gassign *> (def)
930 && gimple_assign_lhs (def) == gimple_return_retval (ret)
931 && gimple_assign_rhs1 (def) == decl)
932 return true;
933 }
934 return false;
935 }
936
937 /* Function vect_model_store_cost
938
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
941
942 static void
943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944 vect_memory_access_type memory_access_type,
945 dr_alignment_support alignment_support_scheme,
946 int misalignment,
947 vec_load_store_type vls_type, slp_tree slp_node,
948 stmt_vector_for_cost *cost_vec)
949 {
950 unsigned int inside_cost = 0, prologue_cost = 0;
951 stmt_vec_info first_stmt_info = stmt_info;
952 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
953
954 /* ??? Somehow we need to fix this at the callers. */
955 if (slp_node)
956 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
957
958 if (vls_type == VLS_STORE_INVARIANT)
959 {
960 if (!slp_node)
961 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
962 stmt_info, 0, vect_prologue);
963 }
964
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node && grouped_access_p)
968 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
969
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p = (first_stmt_info == stmt_info);
974
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
979 if (first_stmt_p
980 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
981 {
982 /* Uses a high and low interleave or shuffle operations for each
983 needed permute. */
984 int group_size = DR_GROUP_SIZE (first_stmt_info);
985 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
986 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
987 stmt_info, 0, vect_body);
988
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: strided group_size = %d .\n",
992 group_size);
993 }
994
995 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
996 /* Costs of the stores. */
997 if (memory_access_type == VMAT_ELEMENTWISE
998 || memory_access_type == VMAT_GATHER_SCATTER)
999 {
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002 inside_cost += record_stmt_cost (cost_vec,
1003 ncopies * assumed_nunits,
1004 scalar_store, stmt_info, 0, vect_body);
1005 }
1006 else
1007 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1008 misalignment, &inside_cost, cost_vec);
1009
1010 if (memory_access_type == VMAT_ELEMENTWISE
1011 || memory_access_type == VMAT_STRIDED_SLP)
1012 {
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1015 inside_cost += record_stmt_cost (cost_vec,
1016 ncopies * assumed_nunits,
1017 vec_to_scalar, stmt_info, 0, vect_body);
1018 }
1019
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1025 if (base
1026 && (TREE_CODE (base) == RESULT_DECL
1027 || (DECL_P (base) && cfun_returns (base)))
1028 && !aggregate_value_p (base, cfun->decl))
1029 {
1030 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1032 if (REG_P (reg))
1033 {
1034 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1037 if (nregs > 1)
1038 {
1039 /* Spill. */
1040 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1041 vector_store,
1042 stmt_info, 0, vect_epilogue);
1043 /* Loads. */
1044 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1045 scalar_load,
1046 stmt_info, 0, vect_epilogue);
1047 }
1048 }
1049 }
1050
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1055 }
1056
1057
1058 /* Calculate cost of DR's memory access. */
1059 void
1060 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1061 dr_alignment_support alignment_support_scheme,
1062 int misalignment,
1063 unsigned int *inside_cost,
1064 stmt_vector_for_cost *body_cost_vec)
1065 {
1066 switch (alignment_support_scheme)
1067 {
1068 case dr_aligned:
1069 {
1070 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071 vector_store, stmt_info, 0,
1072 vect_body);
1073
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE, vect_location,
1076 "vect_model_store_cost: aligned.\n");
1077 break;
1078 }
1079
1080 case dr_unaligned_supported:
1081 {
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 unaligned_store, stmt_info,
1085 misalignment, vect_body);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE, vect_location,
1088 "vect_model_store_cost: unaligned supported by "
1089 "hardware.\n");
1090 break;
1091 }
1092
1093 case dr_unaligned_unsupported:
1094 {
1095 *inside_cost = VECT_MAX_COST;
1096
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1099 "vect_model_store_cost: unsupported access.\n");
1100 break;
1101 }
1102
1103 default:
1104 gcc_unreachable ();
1105 }
1106 }
1107
1108
1109 /* Function vect_model_load_cost
1110
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1115
1116 static void
1117 vect_model_load_cost (vec_info *vinfo,
1118 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1119 vect_memory_access_type memory_access_type,
1120 dr_alignment_support alignment_support_scheme,
1121 int misalignment,
1122 gather_scatter_info *gs_info,
1123 slp_tree slp_node,
1124 stmt_vector_for_cost *cost_vec)
1125 {
1126 unsigned int inside_cost = 0, prologue_cost = 0;
1127 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1128
1129 gcc_assert (cost_vec);
1130
1131 /* ??? Somehow we need to fix this at the callers. */
1132 if (slp_node)
1133 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1134
1135 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1136 {
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms, n_loads;
1142 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1143 vf, true, &n_perms, &n_loads);
1144 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1145 first_stmt_info, 0, vect_body);
1146
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1149 ncopies = n_loads;
1150 }
1151
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info = stmt_info;
1155 if (!slp_node && grouped_access_p)
1156 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1157
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p = (first_stmt_info == stmt_info);
1162
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1166 {
1167 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1168 stmt_vec_info next_stmt_info = first_stmt_info;
1169 do
1170 {
1171 gaps -= 1;
1172 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1173 }
1174 while (next_stmt_info);
1175 if (gaps)
1176 {
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE, vect_location,
1179 "vect_model_load_cost: %d unused vectors.\n",
1180 gaps);
1181 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1182 alignment_support_scheme, misalignment, false,
1183 &inside_cost, &prologue_cost,
1184 cost_vec, cost_vec, true);
1185 }
1186 }
1187
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1192 if (first_stmt_p
1193 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1194 {
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size = DR_GROUP_SIZE (first_stmt_info);
1198 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1199 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1200 stmt_info, 0, vect_body);
1201
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1205 group_size);
1206 }
1207
1208 /* The loads themselves. */
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_GATHER_SCATTER)
1211 {
1212 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1213 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1214 if (memory_access_type == VMAT_GATHER_SCATTER
1215 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1218 the load). */
1219 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1220 vec_to_scalar, stmt_info, 0,
1221 vect_body);
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost += record_stmt_cost (cost_vec,
1224 ncopies * assumed_nunits,
1225 scalar_load, stmt_info, 0, vect_body);
1226 }
1227 else if (memory_access_type == VMAT_INVARIANT)
1228 {
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost += record_stmt_cost (cost_vec, 1,
1231 scalar_load, stmt_info, 0,
1232 vect_prologue);
1233 prologue_cost += record_stmt_cost (cost_vec, 1,
1234 scalar_to_vec, stmt_info, 0,
1235 vect_prologue);
1236 }
1237 else
1238 vect_get_load_cost (vinfo, stmt_info, ncopies,
1239 alignment_support_scheme, misalignment, first_stmt_p,
1240 &inside_cost, &prologue_cost,
1241 cost_vec, cost_vec, true);
1242 if (memory_access_type == VMAT_ELEMENTWISE
1243 || memory_access_type == VMAT_STRIDED_SLP
1244 || (memory_access_type == VMAT_GATHER_SCATTER
1245 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1246 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1247 stmt_info, 0, vect_body);
1248
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1253 }
1254
1255
1256 /* Calculate cost of DR's memory access. */
1257 void
1258 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1259 dr_alignment_support alignment_support_scheme,
1260 int misalignment,
1261 bool add_realign_cost, unsigned int *inside_cost,
1262 unsigned int *prologue_cost,
1263 stmt_vector_for_cost *prologue_cost_vec,
1264 stmt_vector_for_cost *body_cost_vec,
1265 bool record_prologue_costs)
1266 {
1267 switch (alignment_support_scheme)
1268 {
1269 case dr_aligned:
1270 {
1271 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1272 stmt_info, 0, vect_body);
1273
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE, vect_location,
1276 "vect_model_load_cost: aligned.\n");
1277
1278 break;
1279 }
1280 case dr_unaligned_supported:
1281 {
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1284 unaligned_load, stmt_info,
1285 misalignment, vect_body);
1286
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE, vect_location,
1289 "vect_model_load_cost: unaligned supported by "
1290 "hardware.\n");
1291
1292 break;
1293 }
1294 case dr_explicit_realign:
1295 {
1296 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1297 vector_load, stmt_info, 0, vect_body);
1298 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1299 vec_perm, stmt_info, 0, vect_body);
1300
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1303 prologue costs. */
1304 if (targetm.vectorize.builtin_mask_for_load)
1305 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1306 stmt_info, 0, vect_body);
1307
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE, vect_location,
1310 "vect_model_load_cost: explicit realign\n");
1311
1312 break;
1313 }
1314 case dr_explicit_realign_optimized:
1315 {
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "vect_model_load_cost: unaligned software "
1319 "pipelined.\n");
1320
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1327
1328 if (add_realign_cost && record_prologue_costs)
1329 {
1330 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1331 vector_stmt, stmt_info,
1332 0, vect_prologue);
1333 if (targetm.vectorize.builtin_mask_for_load)
1334 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1335 vector_stmt, stmt_info,
1336 0, vect_prologue);
1337 }
1338
1339 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1340 stmt_info, 0, vect_body);
1341 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1342 stmt_info, 0, vect_body);
1343
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_model_load_cost: explicit realign optimized"
1347 "\n");
1348
1349 break;
1350 }
1351
1352 case dr_unaligned_unsupported:
1353 {
1354 *inside_cost = VECT_MAX_COST;
1355
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1358 "vect_model_load_cost: unsupported access.\n");
1359 break;
1360 }
1361
1362 default:
1363 gcc_unreachable ();
1364 }
1365 }
1366
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1369
1370 static void
1371 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1372 gimple_stmt_iterator *gsi)
1373 {
1374 if (gsi)
1375 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1376 else
1377 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1378
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE, vect_location,
1381 "created new init_stmt: %G", new_stmt);
1382 }
1383
1384 /* Function vect_init_vector.
1385
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1393
1394 tree
1395 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1396 gimple_stmt_iterator *gsi)
1397 {
1398 gimple *init_stmt;
1399 tree new_temp;
1400
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1403 {
1404 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1405 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1406 {
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type))
1410 {
1411 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1412 tree false_val = build_zero_cst (TREE_TYPE (type));
1413
1414 if (CONSTANT_CLASS_P (val))
1415 val = integer_zerop (val) ? false_val : true_val;
1416 else
1417 {
1418 new_temp = make_ssa_name (TREE_TYPE (type));
1419 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1420 val, true_val, false_val);
1421 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1422 val = new_temp;
1423 }
1424 }
1425 else
1426 {
1427 gimple_seq stmts = NULL;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1429 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1430 TREE_TYPE (type), val);
1431 else
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1435 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1436 !gsi_end_p (gsi2); )
1437 {
1438 init_stmt = gsi_stmt (gsi2);
1439 gsi_remove (&gsi2, false);
1440 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1441 }
1442 }
1443 }
1444 val = build_vector_from_val (type, val);
1445 }
1446
1447 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1448 init_stmt = gimple_build_assign (new_temp, val);
1449 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1450 return new_temp;
1451 }
1452
1453
1454 /* Function vect_get_vec_defs_for_operand.
1455
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1458
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1461
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1465
1466 void
1467 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1468 unsigned ncopies,
1469 tree op, vec<tree> *vec_oprnds, tree vectype)
1470 {
1471 gimple *def_stmt;
1472 enum vect_def_type dt;
1473 bool is_simple_use;
1474 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1475
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE, vect_location,
1478 "vect_get_vec_defs_for_operand: %T\n", op);
1479
1480 stmt_vec_info def_stmt_info;
1481 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1482 &def_stmt_info, &def_stmt);
1483 gcc_assert (is_simple_use);
1484 if (def_stmt && dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1486
1487 vec_oprnds->create (ncopies);
1488 if (dt == vect_constant_def || dt == vect_external_def)
1489 {
1490 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1491 tree vector_type;
1492
1493 if (vectype)
1494 vector_type = vectype;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1497 vector_type = truth_type_for (stmt_vectype);
1498 else
1499 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1500
1501 gcc_assert (vector_type);
1502 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1503 while (ncopies--)
1504 vec_oprnds->quick_push (vop);
1505 }
1506 else
1507 {
1508 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1510 for (unsigned i = 0; i < ncopies; ++i)
1511 vec_oprnds->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1513 }
1514 }
1515
1516
1517 /* Get vectorized definitions for OP0 and OP1. */
1518
1519 void
1520 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1521 unsigned ncopies,
1522 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1523 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1524 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1525 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1526 {
1527 if (slp_node)
1528 {
1529 if (op0)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1531 if (op1)
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1533 if (op2)
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1535 if (op3)
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1537 }
1538 else
1539 {
1540 if (op0)
1541 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1542 op0, vec_oprnds0, vectype0);
1543 if (op1)
1544 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1545 op1, vec_oprnds1, vectype1);
1546 if (op2)
1547 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1548 op2, vec_oprnds2, vectype2);
1549 if (op3)
1550 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1551 op3, vec_oprnds3, vectype3);
1552 }
1553 }
1554
1555 void
1556 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1557 unsigned ncopies,
1558 tree op0, vec<tree> *vec_oprnds0,
1559 tree op1, vec<tree> *vec_oprnds1,
1560 tree op2, vec<tree> *vec_oprnds2,
1561 tree op3, vec<tree> *vec_oprnds3)
1562 {
1563 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1564 op0, vec_oprnds0, NULL_TREE,
1565 op1, vec_oprnds1, NULL_TREE,
1566 op2, vec_oprnds2, NULL_TREE,
1567 op3, vec_oprnds3, NULL_TREE);
1568 }
1569
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1573
1574 static void
1575 vect_finish_stmt_generation_1 (vec_info *,
1576 stmt_vec_info stmt_info, gimple *vec_stmt)
1577 {
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1580
1581 if (stmt_info)
1582 {
1583 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1584
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1589 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1590 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1591 }
1592 else
1593 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1594 }
1595
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1599
1600 void
1601 vect_finish_replace_stmt (vec_info *vinfo,
1602 stmt_vec_info stmt_info, gimple *vec_stmt)
1603 {
1604 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1605 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1606
1607 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1608 gsi_replace (&gsi, vec_stmt, true);
1609
1610 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1611 }
1612
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1615
1616 void
1617 vect_finish_stmt_generation (vec_info *vinfo,
1618 stmt_vec_info stmt_info, gimple *vec_stmt,
1619 gimple_stmt_iterator *gsi)
1620 {
1621 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1622
1623 if (!gsi_end_p (*gsi)
1624 && gimple_has_mem_ops (vec_stmt))
1625 {
1626 gimple *at_stmt = gsi_stmt (*gsi);
1627 tree vuse = gimple_vuse (at_stmt);
1628 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1629 {
1630 tree vdef = gimple_vdef (at_stmt);
1631 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1632 gimple_set_modified (vec_stmt, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1638 && ((is_gimple_assign (vec_stmt)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1640 || (is_gimple_call (vec_stmt)
1641 && (!(gimple_call_flags (vec_stmt)
1642 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1643 || (gimple_call_lhs (vec_stmt)
1644 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1645 {
1646 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1647 gimple_set_vdef (vec_stmt, new_vdef);
1648 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1649 }
1650 }
1651 }
1652 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1653 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1654 }
1655
1656 /* We want to vectorize a call to combined function CFN with function
1657 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1658 as the types of all inputs. Check whether this is possible using
1659 an internal function, returning its code if so or IFN_LAST if not. */
1660
1661 static internal_fn
1662 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1663 tree vectype_out, tree vectype_in)
1664 {
1665 internal_fn ifn;
1666 if (internal_fn_p (cfn))
1667 ifn = as_internal_fn (cfn);
1668 else
1669 ifn = associated_internal_fn (fndecl);
1670 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1671 {
1672 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1673 if (info.vectorizable)
1674 {
1675 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1676 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1677 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1678 OPTIMIZE_FOR_SPEED))
1679 return ifn;
1680 }
1681 }
1682 return IFN_LAST;
1683 }
1684
1685
1686 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1687 gimple_stmt_iterator *);
1688
1689 /* Check whether a load or store statement in the loop described by
1690 LOOP_VINFO is possible in a loop using partial vectors. This is
1691 testing whether the vectorizer pass has the appropriate support,
1692 as well as whether the target does.
1693
1694 VLS_TYPE says whether the statement is a load or store and VECTYPE
1695 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1696 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1697 says how the load or store is going to be implemented and GROUP_SIZE
1698 is the number of load or store statements in the containing group.
1699 If the access is a gather load or scatter store, GS_INFO describes
1700 its arguments. If the load or store is conditional, SCALAR_MASK is the
1701 condition under which it occurs.
1702
1703 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1704 vectors is not supported, otherwise record the required rgroup control
1705 types. */
1706
1707 static void
1708 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1709 slp_tree slp_node,
1710 vec_load_store_type vls_type,
1711 int group_size,
1712 vect_memory_access_type
1713 memory_access_type,
1714 gather_scatter_info *gs_info,
1715 tree scalar_mask)
1716 {
1717 /* Invariant loads need no special support. */
1718 if (memory_access_type == VMAT_INVARIANT)
1719 return;
1720
1721 unsigned int nvectors;
1722 if (slp_node)
1723 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1724 else
1725 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1726
1727 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1728 machine_mode vecmode = TYPE_MODE (vectype);
1729 bool is_load = (vls_type == VLS_LOAD);
1730 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1731 {
1732 if (is_load
1733 ? !vect_load_lanes_supported (vectype, group_size, true)
1734 : !vect_store_lanes_supported (vectype, group_size, true))
1735 {
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1738 "can't operate on partial vectors because"
1739 " the target doesn't have an appropriate"
1740 " load/store-lanes instruction.\n");
1741 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1742 return;
1743 }
1744 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1745 scalar_mask);
1746 return;
1747 }
1748
1749 if (memory_access_type == VMAT_GATHER_SCATTER)
1750 {
1751 internal_fn ifn = (is_load
1752 ? IFN_MASK_GATHER_LOAD
1753 : IFN_MASK_SCATTER_STORE);
1754 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1755 gs_info->memory_type,
1756 gs_info->offset_vectype,
1757 gs_info->scale))
1758 {
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1761 "can't operate on partial vectors because"
1762 " the target doesn't have an appropriate"
1763 " gather load or scatter store instruction.\n");
1764 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1765 return;
1766 }
1767 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1768 scalar_mask);
1769 return;
1770 }
1771
1772 if (memory_access_type != VMAT_CONTIGUOUS
1773 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1774 {
1775 /* Element X of the data must come from iteration i * VF + X of the
1776 scalar loop. We need more work to support other mappings. */
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1779 "can't operate on partial vectors because an"
1780 " access isn't contiguous.\n");
1781 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1782 return;
1783 }
1784
1785 if (!VECTOR_MODE_P (vecmode))
1786 {
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "can't operate on partial vectors when emulating"
1790 " vector operations.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1792 return;
1793 }
1794
1795 /* We might load more scalars than we need for permuting SLP loads.
1796 We checked in get_group_load_store_type that the extra elements
1797 don't leak into a new vector. */
1798 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1799 {
1800 unsigned int nvectors;
1801 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1802 return nvectors;
1803 gcc_unreachable ();
1804 };
1805
1806 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1807 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1808 machine_mode mask_mode;
1809 bool using_partial_vectors_p = false;
1810 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1811 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1812 {
1813 nvectors = group_memory_nvectors (group_size * vf, nunits);
1814 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1815 using_partial_vectors_p = true;
1816 }
1817
1818 machine_mode vmode;
1819 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1820 {
1821 nvectors = group_memory_nvectors (group_size * vf, nunits);
1822 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1823 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1824 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1825 using_partial_vectors_p = true;
1826 }
1827
1828 if (!using_partial_vectors_p)
1829 {
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1832 "can't operate on partial vectors because the"
1833 " target doesn't have the appropriate partial"
1834 " vectorization load or store.\n");
1835 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1836 }
1837 }
1838
1839 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1843 otherwise return VEC_MASK & LOOP_MASK.
1844
1845 MASK_TYPE is the type of both masks. If new statements are needed,
1846 insert them before GSI. */
1847
1848 static tree
1849 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1850 tree vec_mask, gimple_stmt_iterator *gsi)
1851 {
1852 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1853 if (!loop_mask)
1854 return vec_mask;
1855
1856 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1857
1858 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1859 return vec_mask;
1860
1861 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1862 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1863 vec_mask, loop_mask);
1864
1865 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1866 return and_res;
1867 }
1868
1869 /* Determine whether we can use a gather load or scatter store to vectorize
1870 strided load or store STMT_INFO by truncating the current offset to a
1871 smaller width. We need to be able to construct an offset vector:
1872
1873 { 0, X, X*2, X*3, ... }
1874
1875 without loss of precision, where X is STMT_INFO's DR_STEP.
1876
1877 Return true if this is possible, describing the gather load or scatter
1878 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1879
1880 static bool
1881 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1882 loop_vec_info loop_vinfo, bool masked_p,
1883 gather_scatter_info *gs_info)
1884 {
1885 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1886 data_reference *dr = dr_info->dr;
1887 tree step = DR_STEP (dr);
1888 if (TREE_CODE (step) != INTEGER_CST)
1889 {
1890 /* ??? Perhaps we could use range information here? */
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_NOTE, vect_location,
1893 "cannot truncate variable step.\n");
1894 return false;
1895 }
1896
1897 /* Get the number of bits in an element. */
1898 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1899 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1900 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1901
1902 /* Set COUNT to the upper limit on the number of elements - 1.
1903 Start with the maximum vectorization factor. */
1904 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1905
1906 /* Try lowering COUNT to the number of scalar latch iterations. */
1907 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1908 widest_int max_iters;
1909 if (max_loop_iterations (loop, &max_iters)
1910 && max_iters < count)
1911 count = max_iters.to_shwi ();
1912
1913 /* Try scales of 1 and the element size. */
1914 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1915 wi::overflow_type overflow = wi::OVF_NONE;
1916 for (int i = 0; i < 2; ++i)
1917 {
1918 int scale = scales[i];
1919 widest_int factor;
1920 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1921 continue;
1922
1923 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1924 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1925 if (overflow)
1926 continue;
1927 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1928 unsigned int min_offset_bits = wi::min_precision (range, sign);
1929
1930 /* Find the narrowest viable offset type. */
1931 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1932 tree offset_type = build_nonstandard_integer_type (offset_bits,
1933 sign == UNSIGNED);
1934
1935 /* See whether the target supports the operation with an offset
1936 no narrower than OFFSET_TYPE. */
1937 tree memory_type = TREE_TYPE (DR_REF (dr));
1938 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1939 vectype, memory_type, offset_type, scale,
1940 &gs_info->ifn, &gs_info->offset_vectype)
1941 || gs_info->ifn == IFN_LAST)
1942 continue;
1943
1944 gs_info->decl = NULL_TREE;
1945 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1946 but we don't need to store that here. */
1947 gs_info->base = NULL_TREE;
1948 gs_info->element_type = TREE_TYPE (vectype);
1949 gs_info->offset = fold_convert (offset_type, step);
1950 gs_info->offset_dt = vect_constant_def;
1951 gs_info->scale = scale;
1952 gs_info->memory_type = memory_type;
1953 return true;
1954 }
1955
1956 if (overflow && dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "truncating gather/scatter offset to %d bits"
1959 " might change its value.\n", element_bits);
1960
1961 return false;
1962 }
1963
1964 /* Return true if we can use gather/scatter internal functions to
1965 vectorize STMT_INFO, which is a grouped or strided load or store.
1966 MASKED_P is true if load or store is conditional. When returning
1967 true, fill in GS_INFO with the information required to perform the
1968 operation. */
1969
1970 static bool
1971 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1972 loop_vec_info loop_vinfo, bool masked_p,
1973 gather_scatter_info *gs_info)
1974 {
1975 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1976 || gs_info->ifn == IFN_LAST)
1977 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1978 masked_p, gs_info);
1979
1980 tree old_offset_type = TREE_TYPE (gs_info->offset);
1981 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1982
1983 gcc_assert (TYPE_PRECISION (new_offset_type)
1984 >= TYPE_PRECISION (old_offset_type));
1985 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1986
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE, vect_location,
1989 "using gather/scatter for strided/grouped access,"
1990 " scale = %d\n", gs_info->scale);
1991
1992 return true;
1993 }
1994
1995 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1996 elements with a known constant step. Return -1 if that step
1997 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1998
1999 static int
2000 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2001 {
2002 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2003 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2004 size_zero_node);
2005 }
2006
2007 /* If the target supports a permute mask that reverses the elements in
2008 a vector of type VECTYPE, return that mask, otherwise return null. */
2009
2010 static tree
2011 perm_mask_for_reverse (tree vectype)
2012 {
2013 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2014
2015 /* The encoding has a single stepped pattern. */
2016 vec_perm_builder sel (nunits, 1, 3);
2017 for (int i = 0; i < 3; ++i)
2018 sel.quick_push (nunits - 1 - i);
2019
2020 vec_perm_indices indices (sel, 1, nunits);
2021 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
2022 indices))
2023 return NULL_TREE;
2024 return vect_gen_perm_mask_checked (vectype, indices);
2025 }
2026
2027 /* A subroutine of get_load_store_type, with a subset of the same
2028 arguments. Handle the case where STMT_INFO is a load or store that
2029 accesses consecutive elements with a negative step. Sets *POFFSET
2030 to the offset to be applied to the DR for the first access. */
2031
2032 static vect_memory_access_type
2033 get_negative_load_store_type (vec_info *vinfo,
2034 stmt_vec_info stmt_info, tree vectype,
2035 vec_load_store_type vls_type,
2036 unsigned int ncopies, poly_int64 *poffset)
2037 {
2038 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2039 dr_alignment_support alignment_support_scheme;
2040
2041 if (ncopies > 1)
2042 {
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2045 "multiple types with negative step.\n");
2046 return VMAT_ELEMENTWISE;
2047 }
2048
2049 /* For backward running DRs the first access in vectype actually is
2050 N-1 elements before the address of the DR. */
2051 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2052 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2053
2054 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2055 alignment_support_scheme
2056 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2057 if (alignment_support_scheme != dr_aligned
2058 && alignment_support_scheme != dr_unaligned_supported)
2059 {
2060 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2062 "negative step but alignment required.\n");
2063 *poffset = 0;
2064 return VMAT_ELEMENTWISE;
2065 }
2066
2067 if (vls_type == VLS_STORE_INVARIANT)
2068 {
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_NOTE, vect_location,
2071 "negative step with invariant source;"
2072 " no permute needed.\n");
2073 return VMAT_CONTIGUOUS_DOWN;
2074 }
2075
2076 if (!perm_mask_for_reverse (vectype))
2077 {
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2080 "negative step and reversing not supported.\n");
2081 *poffset = 0;
2082 return VMAT_ELEMENTWISE;
2083 }
2084
2085 return VMAT_CONTIGUOUS_REVERSE;
2086 }
2087
2088 /* STMT_INFO is either a masked or unconditional store. Return the value
2089 being stored. */
2090
2091 tree
2092 vect_get_store_rhs (stmt_vec_info stmt_info)
2093 {
2094 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2095 {
2096 gcc_assert (gimple_assign_single_p (assign));
2097 return gimple_assign_rhs1 (assign);
2098 }
2099 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2100 {
2101 internal_fn ifn = gimple_call_internal_fn (call);
2102 int index = internal_fn_stored_value_index (ifn);
2103 gcc_assert (index >= 0);
2104 return gimple_call_arg (call, index);
2105 }
2106 gcc_unreachable ();
2107 }
2108
2109 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2110
2111 This function returns a vector type which can be composed with NETLS pieces,
2112 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2113 same vector size as the return vector. It checks target whether supports
2114 pieces-size vector mode for construction firstly, if target fails to, check
2115 pieces-size scalar mode for construction further. It returns NULL_TREE if
2116 fails to find the available composition.
2117
2118 For example, for (vtype=V16QI, nelts=4), we can probably get:
2119 - V16QI with PTYPE V4QI.
2120 - V4SI with PTYPE SI.
2121 - NULL_TREE. */
2122
2123 static tree
2124 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2125 {
2126 gcc_assert (VECTOR_TYPE_P (vtype));
2127 gcc_assert (known_gt (nelts, 0U));
2128
2129 machine_mode vmode = TYPE_MODE (vtype);
2130 if (!VECTOR_MODE_P (vmode))
2131 return NULL_TREE;
2132
2133 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2134 unsigned int pbsize;
2135 if (constant_multiple_p (vbsize, nelts, &pbsize))
2136 {
2137 /* First check if vec_init optab supports construction from
2138 vector pieces directly. */
2139 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2140 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2141 machine_mode rmode;
2142 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2143 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2144 != CODE_FOR_nothing))
2145 {
2146 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2147 return vtype;
2148 }
2149
2150 /* Otherwise check if exists an integer type of the same piece size and
2151 if vec_init optab supports construction from it directly. */
2152 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2153 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2154 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2155 != CODE_FOR_nothing))
2156 {
2157 *ptype = build_nonstandard_integer_type (pbsize, 1);
2158 return build_vector_type (*ptype, nelts);
2159 }
2160 }
2161
2162 return NULL_TREE;
2163 }
2164
2165 /* A subroutine of get_load_store_type, with a subset of the same
2166 arguments. Handle the case where STMT_INFO is part of a grouped load
2167 or store.
2168
2169 For stores, the statements in the group are all consecutive
2170 and there is no gap at the end. For loads, the statements in the
2171 group might not be consecutive; there can be gaps between statements
2172 as well as at the end. */
2173
2174 static bool
2175 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2176 tree vectype, slp_tree slp_node,
2177 bool masked_p, vec_load_store_type vls_type,
2178 vect_memory_access_type *memory_access_type,
2179 poly_int64 *poffset,
2180 dr_alignment_support *alignment_support_scheme,
2181 int *misalignment,
2182 gather_scatter_info *gs_info)
2183 {
2184 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2185 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2186 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2187 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2188 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2189 bool single_element_p = (stmt_info == first_stmt_info
2190 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2191 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2192 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2193
2194 /* True if the vectorized statements would access beyond the last
2195 statement in the group. */
2196 bool overrun_p = false;
2197
2198 /* True if we can cope with such overrun by peeling for gaps, so that
2199 there is at least one final scalar iteration after the vector loop. */
2200 bool can_overrun_p = (!masked_p
2201 && vls_type == VLS_LOAD
2202 && loop_vinfo
2203 && !loop->inner);
2204
2205 /* There can only be a gap at the end of the group if the stride is
2206 known at compile time. */
2207 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2208
2209 /* Stores can't yet have gaps. */
2210 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2211
2212 if (slp_node)
2213 {
2214 /* For SLP vectorization we directly vectorize a subchain
2215 without permutation. */
2216 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2217 first_dr_info
2218 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2219 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2220 {
2221 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2222 separated by the stride, until we have a complete vector.
2223 Fall back to scalar accesses if that isn't possible. */
2224 if (multiple_p (nunits, group_size))
2225 *memory_access_type = VMAT_STRIDED_SLP;
2226 else
2227 *memory_access_type = VMAT_ELEMENTWISE;
2228 }
2229 else
2230 {
2231 overrun_p = loop_vinfo && gap != 0;
2232 if (overrun_p && vls_type != VLS_LOAD)
2233 {
2234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2235 "Grouped store with gaps requires"
2236 " non-consecutive accesses\n");
2237 return false;
2238 }
2239 /* An overrun is fine if the trailing elements are smaller
2240 than the alignment boundary B. Every vector access will
2241 be a multiple of B and so we are guaranteed to access a
2242 non-gap element in the same B-sized block. */
2243 if (overrun_p
2244 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2245 vectype)
2246 / vect_get_scalar_dr_size (first_dr_info)))
2247 overrun_p = false;
2248
2249 /* If the gap splits the vector in half and the target
2250 can do half-vector operations avoid the epilogue peeling
2251 by simply loading half of the vector only. Usually
2252 the construction with an upper zero half will be elided. */
2253 dr_alignment_support alss;
2254 int misalign = dr_misalignment (first_dr_info, vectype);
2255 tree half_vtype;
2256 if (overrun_p
2257 && !masked_p
2258 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2259 vectype, misalign)))
2260 == dr_aligned
2261 || alss == dr_unaligned_supported)
2262 && known_eq (nunits, (group_size - gap) * 2)
2263 && known_eq (nunits, group_size)
2264 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2265 != NULL_TREE))
2266 overrun_p = false;
2267
2268 if (overrun_p && !can_overrun_p)
2269 {
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "Peeling for outer loop is not supported\n");
2273 return false;
2274 }
2275 int cmp = compare_step_with_zero (vinfo, stmt_info);
2276 if (cmp < 0)
2277 {
2278 if (single_element_p)
2279 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2280 only correct for single element "interleaving" SLP. */
2281 *memory_access_type = get_negative_load_store_type
2282 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2283 else
2284 {
2285 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2286 separated by the stride, until we have a complete vector.
2287 Fall back to scalar accesses if that isn't possible. */
2288 if (multiple_p (nunits, group_size))
2289 *memory_access_type = VMAT_STRIDED_SLP;
2290 else
2291 *memory_access_type = VMAT_ELEMENTWISE;
2292 }
2293 }
2294 else
2295 {
2296 gcc_assert (!loop_vinfo || cmp > 0);
2297 *memory_access_type = VMAT_CONTIGUOUS;
2298 }
2299
2300 /* When we have a contiguous access across loop iterations
2301 but the access in the loop doesn't cover the full vector
2302 we can end up with no gap recorded but still excess
2303 elements accessed, see PR103116. Make sure we peel for
2304 gaps if necessary and sufficient and give up if not. */
2305 if (loop_vinfo
2306 && *memory_access_type == VMAT_CONTIGUOUS
2307 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2308 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2309 nunits))
2310 {
2311 unsigned HOST_WIDE_INT cnunits, cvf;
2312 if (!can_overrun_p
2313 || !nunits.is_constant (&cnunits)
2314 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2315 /* Peeling for gaps assumes that a single scalar iteration
2316 is enough to make sure the last vector iteration doesn't
2317 access excess elements.
2318 ??? Enhancements include peeling multiple iterations
2319 or using masked loads with a static mask. */
2320 || (group_size * cvf) % cnunits + group_size < cnunits)
2321 {
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "peeling for gaps insufficient for "
2325 "access\n");
2326 return false;
2327 }
2328 overrun_p = true;
2329 }
2330 }
2331 }
2332 else
2333 {
2334 /* We can always handle this case using elementwise accesses,
2335 but see if something more efficient is available. */
2336 *memory_access_type = VMAT_ELEMENTWISE;
2337
2338 /* If there is a gap at the end of the group then these optimizations
2339 would access excess elements in the last iteration. */
2340 bool would_overrun_p = (gap != 0);
2341 /* An overrun is fine if the trailing elements are smaller than the
2342 alignment boundary B. Every vector access will be a multiple of B
2343 and so we are guaranteed to access a non-gap element in the
2344 same B-sized block. */
2345 if (would_overrun_p
2346 && !masked_p
2347 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2348 / vect_get_scalar_dr_size (first_dr_info)))
2349 would_overrun_p = false;
2350
2351 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2352 && (can_overrun_p || !would_overrun_p)
2353 && compare_step_with_zero (vinfo, stmt_info) > 0)
2354 {
2355 /* First cope with the degenerate case of a single-element
2356 vector. */
2357 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2358 ;
2359
2360 /* Otherwise try using LOAD/STORE_LANES. */
2361 else if (vls_type == VLS_LOAD
2362 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2363 : vect_store_lanes_supported (vectype, group_size,
2364 masked_p))
2365 {
2366 *memory_access_type = VMAT_LOAD_STORE_LANES;
2367 overrun_p = would_overrun_p;
2368 }
2369
2370 /* If that fails, try using permuting loads. */
2371 else if (vls_type == VLS_LOAD
2372 ? vect_grouped_load_supported (vectype, single_element_p,
2373 group_size)
2374 : vect_grouped_store_supported (vectype, group_size))
2375 {
2376 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2377 overrun_p = would_overrun_p;
2378 }
2379 }
2380
2381 /* As a last resort, trying using a gather load or scatter store.
2382
2383 ??? Although the code can handle all group sizes correctly,
2384 it probably isn't a win to use separate strided accesses based
2385 on nearby locations. Or, even if it's a win over scalar code,
2386 it might not be a win over vectorizing at a lower VF, if that
2387 allows us to use contiguous accesses. */
2388 if (*memory_access_type == VMAT_ELEMENTWISE
2389 && single_element_p
2390 && loop_vinfo
2391 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2392 masked_p, gs_info))
2393 *memory_access_type = VMAT_GATHER_SCATTER;
2394 }
2395
2396 if (*memory_access_type == VMAT_GATHER_SCATTER
2397 || *memory_access_type == VMAT_ELEMENTWISE)
2398 {
2399 *alignment_support_scheme = dr_unaligned_supported;
2400 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2401 }
2402 else
2403 {
2404 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2405 *alignment_support_scheme
2406 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2407 *misalignment);
2408 }
2409
2410 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2411 {
2412 /* STMT is the leader of the group. Check the operands of all the
2413 stmts of the group. */
2414 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2415 while (next_stmt_info)
2416 {
2417 tree op = vect_get_store_rhs (next_stmt_info);
2418 enum vect_def_type dt;
2419 if (!vect_is_simple_use (op, vinfo, &dt))
2420 {
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 "use not simple.\n");
2424 return false;
2425 }
2426 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2427 }
2428 }
2429
2430 if (overrun_p)
2431 {
2432 gcc_assert (can_overrun_p);
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2435 "Data access with gaps requires scalar "
2436 "epilogue loop\n");
2437 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2438 }
2439
2440 return true;
2441 }
2442
2443 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2444 if there is a memory access type that the vectorized form can use,
2445 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2446 or scatters, fill in GS_INFO accordingly. In addition
2447 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2448 the target does not support the alignment scheme. *MISALIGNMENT
2449 is set according to the alignment of the access (including
2450 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2451
2452 SLP says whether we're performing SLP rather than loop vectorization.
2453 MASKED_P is true if the statement is conditional on a vectorized mask.
2454 VECTYPE is the vector type that the vectorized statements will use.
2455 NCOPIES is the number of vector statements that will be needed. */
2456
2457 static bool
2458 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2459 tree vectype, slp_tree slp_node,
2460 bool masked_p, vec_load_store_type vls_type,
2461 unsigned int ncopies,
2462 vect_memory_access_type *memory_access_type,
2463 poly_int64 *poffset,
2464 dr_alignment_support *alignment_support_scheme,
2465 int *misalignment,
2466 gather_scatter_info *gs_info)
2467 {
2468 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2469 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2470 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2471 *poffset = 0;
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2473 {
2474 *memory_access_type = VMAT_GATHER_SCATTER;
2475 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2476 gcc_unreachable ();
2477 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2478 &gs_info->offset_dt,
2479 &gs_info->offset_vectype))
2480 {
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483 "%s index use not simple.\n",
2484 vls_type == VLS_LOAD ? "gather" : "scatter");
2485 return false;
2486 }
2487 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2488 {
2489 if (vls_type != VLS_LOAD)
2490 {
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "unsupported emulated scatter.\n");
2494 return false;
2495 }
2496 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2497 || !TYPE_VECTOR_SUBPARTS
2498 (gs_info->offset_vectype).is_constant ()
2499 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2500 (gs_info->offset_vectype),
2501 TYPE_VECTOR_SUBPARTS (vectype)))
2502 {
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2505 "unsupported vector types for emulated "
2506 "gather.\n");
2507 return false;
2508 }
2509 }
2510 /* Gather-scatter accesses perform only component accesses, alignment
2511 is irrelevant for them. */
2512 *alignment_support_scheme = dr_unaligned_supported;
2513 }
2514 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2515 {
2516 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2517 masked_p,
2518 vls_type, memory_access_type, poffset,
2519 alignment_support_scheme,
2520 misalignment, gs_info))
2521 return false;
2522 }
2523 else if (STMT_VINFO_STRIDED_P (stmt_info))
2524 {
2525 gcc_assert (!slp_node);
2526 if (loop_vinfo
2527 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2528 masked_p, gs_info))
2529 *memory_access_type = VMAT_GATHER_SCATTER;
2530 else
2531 *memory_access_type = VMAT_ELEMENTWISE;
2532 /* Alignment is irrelevant here. */
2533 *alignment_support_scheme = dr_unaligned_supported;
2534 }
2535 else
2536 {
2537 int cmp = compare_step_with_zero (vinfo, stmt_info);
2538 if (cmp == 0)
2539 {
2540 gcc_assert (vls_type == VLS_LOAD);
2541 *memory_access_type = VMAT_INVARIANT;
2542 /* Invariant accesses perform only component accesses, alignment
2543 is irrelevant for them. */
2544 *alignment_support_scheme = dr_unaligned_supported;
2545 }
2546 else
2547 {
2548 if (cmp < 0)
2549 *memory_access_type = get_negative_load_store_type
2550 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2551 else
2552 *memory_access_type = VMAT_CONTIGUOUS;
2553 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2554 vectype, *poffset);
2555 *alignment_support_scheme
2556 = vect_supportable_dr_alignment (vinfo,
2557 STMT_VINFO_DR_INFO (stmt_info),
2558 vectype, *misalignment);
2559 }
2560 }
2561
2562 if ((*memory_access_type == VMAT_ELEMENTWISE
2563 || *memory_access_type == VMAT_STRIDED_SLP)
2564 && !nunits.is_constant ())
2565 {
2566 if (dump_enabled_p ())
2567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2568 "Not using elementwise accesses due to variable "
2569 "vectorization factor.\n");
2570 return false;
2571 }
2572
2573 if (*alignment_support_scheme == dr_unaligned_unsupported)
2574 {
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2577 "unsupported unaligned access\n");
2578 return false;
2579 }
2580
2581 /* FIXME: At the moment the cost model seems to underestimate the
2582 cost of using elementwise accesses. This check preserves the
2583 traditional behavior until that can be fixed. */
2584 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2585 if (!first_stmt_info)
2586 first_stmt_info = stmt_info;
2587 if (*memory_access_type == VMAT_ELEMENTWISE
2588 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2589 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2590 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2591 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2592 {
2593 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2595 "not falling back to elementwise accesses\n");
2596 return false;
2597 }
2598 return true;
2599 }
2600
2601 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2602 conditional operation STMT_INFO. When returning true, store the mask
2603 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2604 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2605 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2606
2607 static bool
2608 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2609 slp_tree slp_node, unsigned mask_index,
2610 tree *mask, slp_tree *mask_node,
2611 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2612 {
2613 enum vect_def_type mask_dt;
2614 tree mask_vectype;
2615 slp_tree mask_node_1;
2616 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2617 mask, &mask_node_1, &mask_dt, &mask_vectype))
2618 {
2619 if (dump_enabled_p ())
2620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2621 "mask use not simple.\n");
2622 return false;
2623 }
2624
2625 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2626 {
2627 if (dump_enabled_p ())
2628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2629 "mask argument is not a boolean.\n");
2630 return false;
2631 }
2632
2633 /* If the caller is not prepared for adjusting an external/constant
2634 SLP mask vector type fail. */
2635 if (slp_node
2636 && !mask_node
2637 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2638 {
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2641 "SLP mask argument is not vectorized.\n");
2642 return false;
2643 }
2644
2645 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2646 if (!mask_vectype)
2647 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2648
2649 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2650 {
2651 if (dump_enabled_p ())
2652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2653 "could not find an appropriate vector mask type.\n");
2654 return false;
2655 }
2656
2657 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2658 TYPE_VECTOR_SUBPARTS (vectype)))
2659 {
2660 if (dump_enabled_p ())
2661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2662 "vector mask type %T"
2663 " does not match vector data type %T.\n",
2664 mask_vectype, vectype);
2665
2666 return false;
2667 }
2668
2669 *mask_dt_out = mask_dt;
2670 *mask_vectype_out = mask_vectype;
2671 if (mask_node)
2672 *mask_node = mask_node_1;
2673 return true;
2674 }
2675
2676 /* Return true if stored value RHS is suitable for vectorizing store
2677 statement STMT_INFO. When returning true, store the type of the
2678 definition in *RHS_DT_OUT, the type of the vectorized store value in
2679 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2680
2681 static bool
2682 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2683 slp_tree slp_node, tree rhs,
2684 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2685 vec_load_store_type *vls_type_out)
2686 {
2687 /* In the case this is a store from a constant make sure
2688 native_encode_expr can handle it. */
2689 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2690 {
2691 if (dump_enabled_p ())
2692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2693 "cannot encode constant as a byte sequence.\n");
2694 return false;
2695 }
2696
2697 unsigned op_no = 0;
2698 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2699 {
2700 if (gimple_call_internal_p (call)
2701 && internal_store_fn_p (gimple_call_internal_fn (call)))
2702 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2703 }
2704
2705 enum vect_def_type rhs_dt;
2706 tree rhs_vectype;
2707 slp_tree slp_op;
2708 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2709 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2710 {
2711 if (dump_enabled_p ())
2712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2713 "use not simple.\n");
2714 return false;
2715 }
2716
2717 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2718 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2719 {
2720 if (dump_enabled_p ())
2721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2722 "incompatible vector types.\n");
2723 return false;
2724 }
2725
2726 *rhs_dt_out = rhs_dt;
2727 *rhs_vectype_out = rhs_vectype;
2728 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2729 *vls_type_out = VLS_STORE_INVARIANT;
2730 else
2731 *vls_type_out = VLS_STORE;
2732 return true;
2733 }
2734
2735 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2736 Note that we support masks with floating-point type, in which case the
2737 floats are interpreted as a bitmask. */
2738
2739 static tree
2740 vect_build_all_ones_mask (vec_info *vinfo,
2741 stmt_vec_info stmt_info, tree masktype)
2742 {
2743 if (TREE_CODE (masktype) == INTEGER_TYPE)
2744 return build_int_cst (masktype, -1);
2745 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2746 {
2747 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2748 mask = build_vector_from_val (masktype, mask);
2749 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2750 }
2751 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2752 {
2753 REAL_VALUE_TYPE r;
2754 long tmp[6];
2755 for (int j = 0; j < 6; ++j)
2756 tmp[j] = -1;
2757 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2758 tree mask = build_real (TREE_TYPE (masktype), r);
2759 mask = build_vector_from_val (masktype, mask);
2760 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2761 }
2762 gcc_unreachable ();
2763 }
2764
2765 /* Build an all-zero merge value of type VECTYPE while vectorizing
2766 STMT_INFO as a gather load. */
2767
2768 static tree
2769 vect_build_zero_merge_argument (vec_info *vinfo,
2770 stmt_vec_info stmt_info, tree vectype)
2771 {
2772 tree merge;
2773 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2774 merge = build_int_cst (TREE_TYPE (vectype), 0);
2775 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2776 {
2777 REAL_VALUE_TYPE r;
2778 long tmp[6];
2779 for (int j = 0; j < 6; ++j)
2780 tmp[j] = 0;
2781 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2782 merge = build_real (TREE_TYPE (vectype), r);
2783 }
2784 else
2785 gcc_unreachable ();
2786 merge = build_vector_from_val (vectype, merge);
2787 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2788 }
2789
2790 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2791 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2792 the gather load operation. If the load is conditional, MASK is the
2793 unvectorized condition and MASK_DT is its definition type, otherwise
2794 MASK is null. */
2795
2796 static void
2797 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2798 gimple_stmt_iterator *gsi,
2799 gimple **vec_stmt,
2800 gather_scatter_info *gs_info,
2801 tree mask)
2802 {
2803 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2804 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2805 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2806 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2807 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2808 edge pe = loop_preheader_edge (loop);
2809 enum { NARROW, NONE, WIDEN } modifier;
2810 poly_uint64 gather_off_nunits
2811 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2812
2813 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2814 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2815 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2816 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2817 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2818 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2819 tree scaletype = TREE_VALUE (arglist);
2820 tree real_masktype = masktype;
2821 gcc_checking_assert (types_compatible_p (srctype, rettype)
2822 && (!mask
2823 || TREE_CODE (masktype) == INTEGER_TYPE
2824 || types_compatible_p (srctype, masktype)));
2825 if (mask)
2826 masktype = truth_type_for (srctype);
2827
2828 tree mask_halftype = masktype;
2829 tree perm_mask = NULL_TREE;
2830 tree mask_perm_mask = NULL_TREE;
2831 if (known_eq (nunits, gather_off_nunits))
2832 modifier = NONE;
2833 else if (known_eq (nunits * 2, gather_off_nunits))
2834 {
2835 modifier = WIDEN;
2836
2837 /* Currently widening gathers and scatters are only supported for
2838 fixed-length vectors. */
2839 int count = gather_off_nunits.to_constant ();
2840 vec_perm_builder sel (count, count, 1);
2841 for (int i = 0; i < count; ++i)
2842 sel.quick_push (i | (count / 2));
2843
2844 vec_perm_indices indices (sel, 1, count);
2845 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2846 indices);
2847 }
2848 else if (known_eq (nunits, gather_off_nunits * 2))
2849 {
2850 modifier = NARROW;
2851
2852 /* Currently narrowing gathers and scatters are only supported for
2853 fixed-length vectors. */
2854 int count = nunits.to_constant ();
2855 vec_perm_builder sel (count, count, 1);
2856 sel.quick_grow (count);
2857 for (int i = 0; i < count; ++i)
2858 sel[i] = i < count / 2 ? i : i + count / 2;
2859 vec_perm_indices indices (sel, 2, count);
2860 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2861
2862 ncopies *= 2;
2863
2864 if (mask && VECTOR_TYPE_P (real_masktype))
2865 {
2866 for (int i = 0; i < count; ++i)
2867 sel[i] = i | (count / 2);
2868 indices.new_vector (sel, 2, count);
2869 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2870 }
2871 else if (mask)
2872 mask_halftype = truth_type_for (gs_info->offset_vectype);
2873 }
2874 else
2875 gcc_unreachable ();
2876
2877 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2878 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2879
2880 tree ptr = fold_convert (ptrtype, gs_info->base);
2881 if (!is_gimple_min_invariant (ptr))
2882 {
2883 gimple_seq seq;
2884 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2885 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2886 gcc_assert (!new_bb);
2887 }
2888
2889 tree scale = build_int_cst (scaletype, gs_info->scale);
2890
2891 tree vec_oprnd0 = NULL_TREE;
2892 tree vec_mask = NULL_TREE;
2893 tree src_op = NULL_TREE;
2894 tree mask_op = NULL_TREE;
2895 tree prev_res = NULL_TREE;
2896
2897 if (!mask)
2898 {
2899 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2900 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2901 }
2902
2903 auto_vec<tree> vec_oprnds0;
2904 auto_vec<tree> vec_masks;
2905 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2906 modifier == WIDEN ? ncopies / 2 : ncopies,
2907 gs_info->offset, &vec_oprnds0);
2908 if (mask)
2909 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2910 modifier == NARROW ? ncopies / 2 : ncopies,
2911 mask, &vec_masks, masktype);
2912 for (int j = 0; j < ncopies; ++j)
2913 {
2914 tree op, var;
2915 if (modifier == WIDEN && (j & 1))
2916 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2917 perm_mask, stmt_info, gsi);
2918 else
2919 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2920
2921 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2922 {
2923 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2924 TYPE_VECTOR_SUBPARTS (idxtype)));
2925 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2926 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2927 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2928 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2929 op = var;
2930 }
2931
2932 if (mask)
2933 {
2934 if (mask_perm_mask && (j & 1))
2935 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2936 mask_perm_mask, stmt_info, gsi);
2937 else
2938 {
2939 if (modifier == NARROW)
2940 {
2941 if ((j & 1) == 0)
2942 vec_mask = vec_masks[j / 2];
2943 }
2944 else
2945 vec_mask = vec_masks[j];
2946
2947 mask_op = vec_mask;
2948 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2949 {
2950 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2951 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2952 gcc_assert (known_eq (sub1, sub2));
2953 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2954 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2955 gassign *new_stmt
2956 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2957 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2958 mask_op = var;
2959 }
2960 }
2961 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2962 {
2963 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2964 gassign *new_stmt
2965 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2966 : VEC_UNPACK_LO_EXPR,
2967 mask_op);
2968 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2969 mask_op = var;
2970 }
2971 src_op = mask_op;
2972 }
2973
2974 tree mask_arg = mask_op;
2975 if (masktype != real_masktype)
2976 {
2977 tree utype, optype = TREE_TYPE (mask_op);
2978 if (VECTOR_TYPE_P (real_masktype)
2979 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2980 utype = real_masktype;
2981 else
2982 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2983 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2984 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2985 gassign *new_stmt
2986 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2987 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2988 mask_arg = var;
2989 if (!useless_type_conversion_p (real_masktype, utype))
2990 {
2991 gcc_assert (TYPE_PRECISION (utype)
2992 <= TYPE_PRECISION (real_masktype));
2993 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2994 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2995 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2996 mask_arg = var;
2997 }
2998 src_op = build_zero_cst (srctype);
2999 }
3000 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
3001 mask_arg, scale);
3002
3003 if (!useless_type_conversion_p (vectype, rettype))
3004 {
3005 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3006 TYPE_VECTOR_SUBPARTS (rettype)));
3007 op = vect_get_new_ssa_name (rettype, vect_simple_var);
3008 gimple_call_set_lhs (new_stmt, op);
3009 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3010 var = make_ssa_name (vec_dest);
3011 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3012 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3013 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3014 }
3015 else
3016 {
3017 var = make_ssa_name (vec_dest, new_stmt);
3018 gimple_call_set_lhs (new_stmt, var);
3019 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3020 }
3021
3022 if (modifier == NARROW)
3023 {
3024 if ((j & 1) == 0)
3025 {
3026 prev_res = var;
3027 continue;
3028 }
3029 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3030 stmt_info, gsi);
3031 new_stmt = SSA_NAME_DEF_STMT (var);
3032 }
3033
3034 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3035 }
3036 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3037 }
3038
3039 /* Prepare the base and offset in GS_INFO for vectorization.
3040 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3041 to the vectorized offset argument for the first copy of STMT_INFO.
3042 STMT_INFO is the statement described by GS_INFO and LOOP is the
3043 containing loop. */
3044
3045 static void
3046 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3047 class loop *loop, stmt_vec_info stmt_info,
3048 slp_tree slp_node, gather_scatter_info *gs_info,
3049 tree *dataref_ptr, vec<tree> *vec_offset)
3050 {
3051 gimple_seq stmts = NULL;
3052 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3053 if (stmts != NULL)
3054 {
3055 basic_block new_bb;
3056 edge pe = loop_preheader_edge (loop);
3057 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3058 gcc_assert (!new_bb);
3059 }
3060 if (slp_node)
3061 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3062 else
3063 {
3064 unsigned ncopies
3065 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3066 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3067 gs_info->offset, vec_offset,
3068 gs_info->offset_vectype);
3069 }
3070 }
3071
3072 /* Prepare to implement a grouped or strided load or store using
3073 the gather load or scatter store operation described by GS_INFO.
3074 STMT_INFO is the load or store statement.
3075
3076 Set *DATAREF_BUMP to the amount that should be added to the base
3077 address after each copy of the vectorized statement. Set *VEC_OFFSET
3078 to an invariant offset vector in which element I has the value
3079 I * DR_STEP / SCALE. */
3080
3081 static void
3082 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3083 loop_vec_info loop_vinfo,
3084 gather_scatter_info *gs_info,
3085 tree *dataref_bump, tree *vec_offset)
3086 {
3087 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3089
3090 tree bump = size_binop (MULT_EXPR,
3091 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3092 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3093 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3094
3095 /* The offset given in GS_INFO can have pointer type, so use the element
3096 type of the vector instead. */
3097 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3098
3099 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3100 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3101 ssize_int (gs_info->scale));
3102 step = fold_convert (offset_type, step);
3103
3104 /* Create {0, X, X*2, X*3, ...}. */
3105 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3106 build_zero_cst (offset_type), step);
3107 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3108 }
3109
3110 /* Return the amount that should be added to a vector pointer to move
3111 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3112 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3113 vectorization. */
3114
3115 static tree
3116 vect_get_data_ptr_increment (vec_info *vinfo,
3117 dr_vec_info *dr_info, tree aggr_type,
3118 vect_memory_access_type memory_access_type)
3119 {
3120 if (memory_access_type == VMAT_INVARIANT)
3121 return size_zero_node;
3122
3123 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3124 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3125 if (tree_int_cst_sgn (step) == -1)
3126 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3127 return iv_step;
3128 }
3129
3130 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3131
3132 static bool
3133 vectorizable_bswap (vec_info *vinfo,
3134 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3135 gimple **vec_stmt, slp_tree slp_node,
3136 slp_tree *slp_op,
3137 tree vectype_in, stmt_vector_for_cost *cost_vec)
3138 {
3139 tree op, vectype;
3140 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3141 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3142 unsigned ncopies;
3143
3144 op = gimple_call_arg (stmt, 0);
3145 vectype = STMT_VINFO_VECTYPE (stmt_info);
3146 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3147
3148 /* Multiple types in SLP are handled by creating the appropriate number of
3149 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3150 case of SLP. */
3151 if (slp_node)
3152 ncopies = 1;
3153 else
3154 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3155
3156 gcc_assert (ncopies >= 1);
3157
3158 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3159 if (! char_vectype)
3160 return false;
3161
3162 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3163 unsigned word_bytes;
3164 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3165 return false;
3166
3167 /* The encoding uses one stepped pattern for each byte in the word. */
3168 vec_perm_builder elts (num_bytes, word_bytes, 3);
3169 for (unsigned i = 0; i < 3; ++i)
3170 for (unsigned j = 0; j < word_bytes; ++j)
3171 elts.quick_push ((i + 1) * word_bytes - j - 1);
3172
3173 vec_perm_indices indices (elts, 1, num_bytes);
3174 machine_mode vmode = TYPE_MODE (char_vectype);
3175 if (!can_vec_perm_const_p (vmode, vmode, indices))
3176 return false;
3177
3178 if (! vec_stmt)
3179 {
3180 if (slp_node
3181 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3182 {
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3185 "incompatible vector types for invariants\n");
3186 return false;
3187 }
3188
3189 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3190 DUMP_VECT_SCOPE ("vectorizable_bswap");
3191 record_stmt_cost (cost_vec,
3192 1, vector_stmt, stmt_info, 0, vect_prologue);
3193 record_stmt_cost (cost_vec,
3194 slp_node
3195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3196 vec_perm, stmt_info, 0, vect_body);
3197 return true;
3198 }
3199
3200 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3201
3202 /* Transform. */
3203 vec<tree> vec_oprnds = vNULL;
3204 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3205 op, &vec_oprnds);
3206 /* Arguments are ready. create the new vector stmt. */
3207 unsigned i;
3208 tree vop;
3209 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3210 {
3211 gimple *new_stmt;
3212 tree tem = make_ssa_name (char_vectype);
3213 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3214 char_vectype, vop));
3215 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3216 tree tem2 = make_ssa_name (char_vectype);
3217 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3218 tem, tem, bswap_vconst);
3219 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3220 tem = make_ssa_name (vectype);
3221 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3222 vectype, tem2));
3223 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3224 if (slp_node)
3225 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3226 else
3227 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3228 }
3229
3230 if (!slp_node)
3231 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3232
3233 vec_oprnds.release ();
3234 return true;
3235 }
3236
3237 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3238 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3239 in a single step. On success, store the binary pack code in
3240 *CONVERT_CODE. */
3241
3242 static bool
3243 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3244 tree_code *convert_code)
3245 {
3246 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3247 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3248 return false;
3249
3250 tree_code code;
3251 int multi_step_cvt = 0;
3252 auto_vec <tree, 8> interm_types;
3253 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3254 &code, &multi_step_cvt, &interm_types)
3255 || multi_step_cvt)
3256 return false;
3257
3258 *convert_code = code;
3259 return true;
3260 }
3261
3262 /* Function vectorizable_call.
3263
3264 Check if STMT_INFO performs a function call that can be vectorized.
3265 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3266 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3267 Return true if STMT_INFO is vectorizable in this way. */
3268
3269 static bool
3270 vectorizable_call (vec_info *vinfo,
3271 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3272 gimple **vec_stmt, slp_tree slp_node,
3273 stmt_vector_for_cost *cost_vec)
3274 {
3275 gcall *stmt;
3276 tree vec_dest;
3277 tree scalar_dest;
3278 tree op;
3279 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3280 tree vectype_out, vectype_in;
3281 poly_uint64 nunits_in;
3282 poly_uint64 nunits_out;
3283 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3284 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3285 tree fndecl, new_temp, rhs_type;
3286 enum vect_def_type dt[4]
3287 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3288 vect_unknown_def_type };
3289 tree vectypes[ARRAY_SIZE (dt)] = {};
3290 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3291 int ndts = ARRAY_SIZE (dt);
3292 int ncopies, j;
3293 auto_vec<tree, 8> vargs;
3294 enum { NARROW, NONE, WIDEN } modifier;
3295 size_t i, nargs;
3296 tree lhs;
3297
3298 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3299 return false;
3300
3301 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3302 && ! vec_stmt)
3303 return false;
3304
3305 /* Is STMT_INFO a vectorizable call? */
3306 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3307 if (!stmt)
3308 return false;
3309
3310 if (gimple_call_internal_p (stmt)
3311 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3312 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3313 /* Handled by vectorizable_load and vectorizable_store. */
3314 return false;
3315
3316 if (gimple_call_lhs (stmt) == NULL_TREE
3317 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3318 return false;
3319
3320 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3321
3322 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3323
3324 /* Process function arguments. */
3325 rhs_type = NULL_TREE;
3326 vectype_in = NULL_TREE;
3327 nargs = gimple_call_num_args (stmt);
3328
3329 /* Bail out if the function has more than four arguments, we do not have
3330 interesting builtin functions to vectorize with more than two arguments
3331 except for fma. No arguments is also not good. */
3332 if (nargs == 0 || nargs > 4)
3333 return false;
3334
3335 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3336 combined_fn cfn = gimple_call_combined_fn (stmt);
3337 if (cfn == CFN_GOMP_SIMD_LANE)
3338 {
3339 nargs = 0;
3340 rhs_type = unsigned_type_node;
3341 }
3342
3343 int mask_opno = -1;
3344 if (internal_fn_p (cfn))
3345 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3346
3347 for (i = 0; i < nargs; i++)
3348 {
3349 if ((int) i == mask_opno)
3350 {
3351 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3352 &op, &slp_op[i], &dt[i], &vectypes[i]))
3353 return false;
3354 continue;
3355 }
3356
3357 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3358 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3359 {
3360 if (dump_enabled_p ())
3361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3362 "use not simple.\n");
3363 return false;
3364 }
3365
3366 /* We can only handle calls with arguments of the same type. */
3367 if (rhs_type
3368 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3369 {
3370 if (dump_enabled_p ())
3371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3372 "argument types differ.\n");
3373 return false;
3374 }
3375 if (!rhs_type)
3376 rhs_type = TREE_TYPE (op);
3377
3378 if (!vectype_in)
3379 vectype_in = vectypes[i];
3380 else if (vectypes[i]
3381 && !types_compatible_p (vectypes[i], vectype_in))
3382 {
3383 if (dump_enabled_p ())
3384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3385 "argument vector types differ.\n");
3386 return false;
3387 }
3388 }
3389 /* If all arguments are external or constant defs, infer the vector type
3390 from the scalar type. */
3391 if (!vectype_in)
3392 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3393 if (vec_stmt)
3394 gcc_assert (vectype_in);
3395 if (!vectype_in)
3396 {
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3399 "no vectype for scalar type %T\n", rhs_type);
3400
3401 return false;
3402 }
3403 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3404 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3405 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3406 by a pack of the two vectors into an SI vector. We would need
3407 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3408 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3409 {
3410 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3412 "mismatched vector sizes %T and %T\n",
3413 vectype_in, vectype_out);
3414 return false;
3415 }
3416
3417 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3418 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3419 {
3420 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3422 "mixed mask and nonmask vector types\n");
3423 return false;
3424 }
3425
3426 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3427 {
3428 if (dump_enabled_p ())
3429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3430 "use emulated vector type for call\n");
3431 return false;
3432 }
3433
3434 /* FORNOW */
3435 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3436 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3437 if (known_eq (nunits_in * 2, nunits_out))
3438 modifier = NARROW;
3439 else if (known_eq (nunits_out, nunits_in))
3440 modifier = NONE;
3441 else if (known_eq (nunits_out * 2, nunits_in))
3442 modifier = WIDEN;
3443 else
3444 return false;
3445
3446 /* We only handle functions that do not read or clobber memory. */
3447 if (gimple_vuse (stmt))
3448 {
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3451 "function reads from or writes to memory.\n");
3452 return false;
3453 }
3454
3455 /* For now, we only vectorize functions if a target specific builtin
3456 is available. TODO -- in some cases, it might be profitable to
3457 insert the calls for pieces of the vector, in order to be able
3458 to vectorize other operations in the loop. */
3459 fndecl = NULL_TREE;
3460 internal_fn ifn = IFN_LAST;
3461 tree callee = gimple_call_fndecl (stmt);
3462
3463 /* First try using an internal function. */
3464 tree_code convert_code = ERROR_MARK;
3465 if (cfn != CFN_LAST
3466 && (modifier == NONE
3467 || (modifier == NARROW
3468 && simple_integer_narrowing (vectype_out, vectype_in,
3469 &convert_code))))
3470 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3471 vectype_in);
3472
3473 /* If that fails, try asking for a target-specific built-in function. */
3474 if (ifn == IFN_LAST)
3475 {
3476 if (cfn != CFN_LAST)
3477 fndecl = targetm.vectorize.builtin_vectorized_function
3478 (cfn, vectype_out, vectype_in);
3479 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3480 fndecl = targetm.vectorize.builtin_md_vectorized_function
3481 (callee, vectype_out, vectype_in);
3482 }
3483
3484 if (ifn == IFN_LAST && !fndecl)
3485 {
3486 if (cfn == CFN_GOMP_SIMD_LANE
3487 && !slp_node
3488 && loop_vinfo
3489 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3490 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3491 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3492 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3493 {
3494 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3495 { 0, 1, 2, ... vf - 1 } vector. */
3496 gcc_assert (nargs == 0);
3497 }
3498 else if (modifier == NONE
3499 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3500 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3501 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3502 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3503 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3504 slp_op, vectype_in, cost_vec);
3505 else
3506 {
3507 if (dump_enabled_p ())
3508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3509 "function is not vectorizable.\n");
3510 return false;
3511 }
3512 }
3513
3514 if (slp_node)
3515 ncopies = 1;
3516 else if (modifier == NARROW && ifn == IFN_LAST)
3517 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3518 else
3519 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3520
3521 /* Sanity check: make sure that at least one copy of the vectorized stmt
3522 needs to be generated. */
3523 gcc_assert (ncopies >= 1);
3524
3525 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3526 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3527 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3528 if (!vec_stmt) /* transformation not required. */
3529 {
3530 if (slp_node)
3531 for (i = 0; i < nargs; ++i)
3532 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3533 vectypes[i]
3534 ? vectypes[i] : vectype_in))
3535 {
3536 if (dump_enabled_p ())
3537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3538 "incompatible vector types for invariants\n");
3539 return false;
3540 }
3541 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3542 DUMP_VECT_SCOPE ("vectorizable_call");
3543 vect_model_simple_cost (vinfo, stmt_info,
3544 ncopies, dt, ndts, slp_node, cost_vec);
3545 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3546 record_stmt_cost (cost_vec, ncopies / 2,
3547 vec_promote_demote, stmt_info, 0, vect_body);
3548
3549 if (loop_vinfo
3550 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3551 && (reduc_idx >= 0 || mask_opno >= 0))
3552 {
3553 if (reduc_idx >= 0
3554 && (cond_fn == IFN_LAST
3555 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3556 OPTIMIZE_FOR_SPEED)))
3557 {
3558 if (dump_enabled_p ())
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3560 "can't use a fully-masked loop because no"
3561 " conditional operation is available.\n");
3562 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3563 }
3564 else
3565 {
3566 unsigned int nvectors
3567 = (slp_node
3568 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3569 : ncopies);
3570 tree scalar_mask = NULL_TREE;
3571 if (mask_opno >= 0)
3572 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3573 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3574 vectype_out, scalar_mask);
3575 }
3576 }
3577 return true;
3578 }
3579
3580 /* Transform. */
3581
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3584
3585 /* Handle def. */
3586 scalar_dest = gimple_call_lhs (stmt);
3587 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3588
3589 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3590 unsigned int vect_nargs = nargs;
3591 if (masked_loop_p && reduc_idx >= 0)
3592 {
3593 ifn = cond_fn;
3594 vect_nargs += 2;
3595 }
3596
3597 if (modifier == NONE || ifn != IFN_LAST)
3598 {
3599 tree prev_res = NULL_TREE;
3600 vargs.safe_grow (vect_nargs, true);
3601 auto_vec<vec<tree> > vec_defs (nargs);
3602 for (j = 0; j < ncopies; ++j)
3603 {
3604 /* Build argument list for the vectorized call. */
3605 if (slp_node)
3606 {
3607 vec<tree> vec_oprnds0;
3608
3609 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3610 vec_oprnds0 = vec_defs[0];
3611
3612 /* Arguments are ready. Create the new vector stmt. */
3613 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3614 {
3615 int varg = 0;
3616 if (masked_loop_p && reduc_idx >= 0)
3617 {
3618 unsigned int vec_num = vec_oprnds0.length ();
3619 /* Always true for SLP. */
3620 gcc_assert (ncopies == 1);
3621 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3622 vectype_out, i);
3623 }
3624 size_t k;
3625 for (k = 0; k < nargs; k++)
3626 {
3627 vec<tree> vec_oprndsk = vec_defs[k];
3628 vargs[varg++] = vec_oprndsk[i];
3629 }
3630 if (masked_loop_p && reduc_idx >= 0)
3631 vargs[varg++] = vargs[reduc_idx + 1];
3632 gimple *new_stmt;
3633 if (modifier == NARROW)
3634 {
3635 /* We don't define any narrowing conditional functions
3636 at present. */
3637 gcc_assert (mask_opno < 0);
3638 tree half_res = make_ssa_name (vectype_in);
3639 gcall *call
3640 = gimple_build_call_internal_vec (ifn, vargs);
3641 gimple_call_set_lhs (call, half_res);
3642 gimple_call_set_nothrow (call, true);
3643 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3644 if ((i & 1) == 0)
3645 {
3646 prev_res = half_res;
3647 continue;
3648 }
3649 new_temp = make_ssa_name (vec_dest);
3650 new_stmt = gimple_build_assign (new_temp, convert_code,
3651 prev_res, half_res);
3652 vect_finish_stmt_generation (vinfo, stmt_info,
3653 new_stmt, gsi);
3654 }
3655 else
3656 {
3657 if (mask_opno >= 0 && masked_loop_p)
3658 {
3659 unsigned int vec_num = vec_oprnds0.length ();
3660 /* Always true for SLP. */
3661 gcc_assert (ncopies == 1);
3662 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3663 vectype_out, i);
3664 vargs[mask_opno] = prepare_vec_mask
3665 (loop_vinfo, TREE_TYPE (mask), mask,
3666 vargs[mask_opno], gsi);
3667 }
3668
3669 gcall *call;
3670 if (ifn != IFN_LAST)
3671 call = gimple_build_call_internal_vec (ifn, vargs);
3672 else
3673 call = gimple_build_call_vec (fndecl, vargs);
3674 new_temp = make_ssa_name (vec_dest, call);
3675 gimple_call_set_lhs (call, new_temp);
3676 gimple_call_set_nothrow (call, true);
3677 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3678 new_stmt = call;
3679 }
3680 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3681 }
3682 continue;
3683 }
3684
3685 int varg = 0;
3686 if (masked_loop_p && reduc_idx >= 0)
3687 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3688 vectype_out, j);
3689 for (i = 0; i < nargs; i++)
3690 {
3691 op = gimple_call_arg (stmt, i);
3692 if (j == 0)
3693 {
3694 vec_defs.quick_push (vNULL);
3695 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3696 op, &vec_defs[i],
3697 vectypes[i]);
3698 }
3699 vargs[varg++] = vec_defs[i][j];
3700 }
3701 if (masked_loop_p && reduc_idx >= 0)
3702 vargs[varg++] = vargs[reduc_idx + 1];
3703
3704 if (mask_opno >= 0 && masked_loop_p)
3705 {
3706 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3707 vectype_out, j);
3708 vargs[mask_opno]
3709 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3710 vargs[mask_opno], gsi);
3711 }
3712
3713 gimple *new_stmt;
3714 if (cfn == CFN_GOMP_SIMD_LANE)
3715 {
3716 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3717 tree new_var
3718 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3719 gimple *init_stmt = gimple_build_assign (new_var, cst);
3720 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3721 new_temp = make_ssa_name (vec_dest);
3722 new_stmt = gimple_build_assign (new_temp, new_var);
3723 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3724 }
3725 else if (modifier == NARROW)
3726 {
3727 /* We don't define any narrowing conditional functions at
3728 present. */
3729 gcc_assert (mask_opno < 0);
3730 tree half_res = make_ssa_name (vectype_in);
3731 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3732 gimple_call_set_lhs (call, half_res);
3733 gimple_call_set_nothrow (call, true);
3734 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3735 if ((j & 1) == 0)
3736 {
3737 prev_res = half_res;
3738 continue;
3739 }
3740 new_temp = make_ssa_name (vec_dest);
3741 new_stmt = gimple_build_assign (new_temp, convert_code,
3742 prev_res, half_res);
3743 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3744 }
3745 else
3746 {
3747 gcall *call;
3748 if (ifn != IFN_LAST)
3749 call = gimple_build_call_internal_vec (ifn, vargs);
3750 else
3751 call = gimple_build_call_vec (fndecl, vargs);
3752 new_temp = make_ssa_name (vec_dest, call);
3753 gimple_call_set_lhs (call, new_temp);
3754 gimple_call_set_nothrow (call, true);
3755 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3756 new_stmt = call;
3757 }
3758
3759 if (j == (modifier == NARROW ? 1 : 0))
3760 *vec_stmt = new_stmt;
3761 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3762 }
3763 for (i = 0; i < nargs; i++)
3764 {
3765 vec<tree> vec_oprndsi = vec_defs[i];
3766 vec_oprndsi.release ();
3767 }
3768 }
3769 else if (modifier == NARROW)
3770 {
3771 auto_vec<vec<tree> > vec_defs (nargs);
3772 /* We don't define any narrowing conditional functions at present. */
3773 gcc_assert (mask_opno < 0);
3774 for (j = 0; j < ncopies; ++j)
3775 {
3776 /* Build argument list for the vectorized call. */
3777 if (j == 0)
3778 vargs.create (nargs * 2);
3779 else
3780 vargs.truncate (0);
3781
3782 if (slp_node)
3783 {
3784 vec<tree> vec_oprnds0;
3785
3786 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3787 vec_oprnds0 = vec_defs[0];
3788
3789 /* Arguments are ready. Create the new vector stmt. */
3790 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3791 {
3792 size_t k;
3793 vargs.truncate (0);
3794 for (k = 0; k < nargs; k++)
3795 {
3796 vec<tree> vec_oprndsk = vec_defs[k];
3797 vargs.quick_push (vec_oprndsk[i]);
3798 vargs.quick_push (vec_oprndsk[i + 1]);
3799 }
3800 gcall *call;
3801 if (ifn != IFN_LAST)
3802 call = gimple_build_call_internal_vec (ifn, vargs);
3803 else
3804 call = gimple_build_call_vec (fndecl, vargs);
3805 new_temp = make_ssa_name (vec_dest, call);
3806 gimple_call_set_lhs (call, new_temp);
3807 gimple_call_set_nothrow (call, true);
3808 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3809 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3810 }
3811 continue;
3812 }
3813
3814 for (i = 0; i < nargs; i++)
3815 {
3816 op = gimple_call_arg (stmt, i);
3817 if (j == 0)
3818 {
3819 vec_defs.quick_push (vNULL);
3820 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3821 op, &vec_defs[i], vectypes[i]);
3822 }
3823 vec_oprnd0 = vec_defs[i][2*j];
3824 vec_oprnd1 = vec_defs[i][2*j+1];
3825
3826 vargs.quick_push (vec_oprnd0);
3827 vargs.quick_push (vec_oprnd1);
3828 }
3829
3830 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3831 new_temp = make_ssa_name (vec_dest, new_stmt);
3832 gimple_call_set_lhs (new_stmt, new_temp);
3833 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3834
3835 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3836 }
3837
3838 if (!slp_node)
3839 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3840
3841 for (i = 0; i < nargs; i++)
3842 {
3843 vec<tree> vec_oprndsi = vec_defs[i];
3844 vec_oprndsi.release ();
3845 }
3846 }
3847 else
3848 /* No current target implements this case. */
3849 return false;
3850
3851 vargs.release ();
3852
3853 /* The call in STMT might prevent it from being removed in dce.
3854 We however cannot remove it here, due to the way the ssa name
3855 it defines is mapped to the new definition. So just replace
3856 rhs of the statement with something harmless. */
3857
3858 if (slp_node)
3859 return true;
3860
3861 stmt_info = vect_orig_stmt (stmt_info);
3862 lhs = gimple_get_lhs (stmt_info->stmt);
3863
3864 gassign *new_stmt
3865 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3866 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3867
3868 return true;
3869 }
3870
3871
3872 struct simd_call_arg_info
3873 {
3874 tree vectype;
3875 tree op;
3876 HOST_WIDE_INT linear_step;
3877 enum vect_def_type dt;
3878 unsigned int align;
3879 bool simd_lane_linear;
3880 };
3881
3882 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3883 is linear within simd lane (but not within whole loop), note it in
3884 *ARGINFO. */
3885
3886 static void
3887 vect_simd_lane_linear (tree op, class loop *loop,
3888 struct simd_call_arg_info *arginfo)
3889 {
3890 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3891
3892 if (!is_gimple_assign (def_stmt)
3893 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3894 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3895 return;
3896
3897 tree base = gimple_assign_rhs1 (def_stmt);
3898 HOST_WIDE_INT linear_step = 0;
3899 tree v = gimple_assign_rhs2 (def_stmt);
3900 while (TREE_CODE (v) == SSA_NAME)
3901 {
3902 tree t;
3903 def_stmt = SSA_NAME_DEF_STMT (v);
3904 if (is_gimple_assign (def_stmt))
3905 switch (gimple_assign_rhs_code (def_stmt))
3906 {
3907 case PLUS_EXPR:
3908 t = gimple_assign_rhs2 (def_stmt);
3909 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3910 return;
3911 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3912 v = gimple_assign_rhs1 (def_stmt);
3913 continue;
3914 case MULT_EXPR:
3915 t = gimple_assign_rhs2 (def_stmt);
3916 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3917 return;
3918 linear_step = tree_to_shwi (t);
3919 v = gimple_assign_rhs1 (def_stmt);
3920 continue;
3921 CASE_CONVERT:
3922 t = gimple_assign_rhs1 (def_stmt);
3923 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3924 || (TYPE_PRECISION (TREE_TYPE (v))
3925 < TYPE_PRECISION (TREE_TYPE (t))))
3926 return;
3927 if (!linear_step)
3928 linear_step = 1;
3929 v = t;
3930 continue;
3931 default:
3932 return;
3933 }
3934 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3935 && loop->simduid
3936 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3937 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3938 == loop->simduid))
3939 {
3940 if (!linear_step)
3941 linear_step = 1;
3942 arginfo->linear_step = linear_step;
3943 arginfo->op = base;
3944 arginfo->simd_lane_linear = true;
3945 return;
3946 }
3947 }
3948 }
3949
3950 /* Return the number of elements in vector type VECTYPE, which is associated
3951 with a SIMD clone. At present these vectors always have a constant
3952 length. */
3953
3954 static unsigned HOST_WIDE_INT
3955 simd_clone_subparts (tree vectype)
3956 {
3957 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3958 }
3959
3960 /* Function vectorizable_simd_clone_call.
3961
3962 Check if STMT_INFO performs a function call that can be vectorized
3963 by calling a simd clone of the function.
3964 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3965 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3966 Return true if STMT_INFO is vectorizable in this way. */
3967
3968 static bool
3969 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3970 gimple_stmt_iterator *gsi,
3971 gimple **vec_stmt, slp_tree slp_node,
3972 stmt_vector_for_cost *)
3973 {
3974 tree vec_dest;
3975 tree scalar_dest;
3976 tree op, type;
3977 tree vec_oprnd0 = NULL_TREE;
3978 tree vectype;
3979 poly_uint64 nunits;
3980 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3981 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3982 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3983 tree fndecl, new_temp;
3984 int ncopies, j;
3985 auto_vec<simd_call_arg_info> arginfo;
3986 vec<tree> vargs = vNULL;
3987 size_t i, nargs;
3988 tree lhs, rtype, ratype;
3989 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3990
3991 /* Is STMT a vectorizable call? */
3992 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3993 if (!stmt)
3994 return false;
3995
3996 fndecl = gimple_call_fndecl (stmt);
3997 if (fndecl == NULL_TREE)
3998 return false;
3999
4000 struct cgraph_node *node = cgraph_node::get (fndecl);
4001 if (node == NULL || node->simd_clones == NULL)
4002 return false;
4003
4004 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4005 return false;
4006
4007 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4008 && ! vec_stmt)
4009 return false;
4010
4011 if (gimple_call_lhs (stmt)
4012 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4013 return false;
4014
4015 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4016
4017 vectype = STMT_VINFO_VECTYPE (stmt_info);
4018
4019 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4020 return false;
4021
4022 /* FORNOW */
4023 if (slp_node)
4024 return false;
4025
4026 /* Process function arguments. */
4027 nargs = gimple_call_num_args (stmt);
4028
4029 /* Bail out if the function has zero arguments. */
4030 if (nargs == 0)
4031 return false;
4032
4033 arginfo.reserve (nargs, true);
4034
4035 for (i = 0; i < nargs; i++)
4036 {
4037 simd_call_arg_info thisarginfo;
4038 affine_iv iv;
4039
4040 thisarginfo.linear_step = 0;
4041 thisarginfo.align = 0;
4042 thisarginfo.op = NULL_TREE;
4043 thisarginfo.simd_lane_linear = false;
4044
4045 op = gimple_call_arg (stmt, i);
4046 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4047 &thisarginfo.vectype)
4048 || thisarginfo.dt == vect_uninitialized_def)
4049 {
4050 if (dump_enabled_p ())
4051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4052 "use not simple.\n");
4053 return false;
4054 }
4055
4056 if (thisarginfo.dt == vect_constant_def
4057 || thisarginfo.dt == vect_external_def)
4058 gcc_assert (thisarginfo.vectype == NULL_TREE);
4059 else
4060 {
4061 gcc_assert (thisarginfo.vectype != NULL_TREE);
4062 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
4063 {
4064 if (dump_enabled_p ())
4065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4066 "vector mask arguments are not supported\n");
4067 return false;
4068 }
4069 }
4070
4071 /* For linear arguments, the analyze phase should have saved
4072 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4073 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4074 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4075 {
4076 gcc_assert (vec_stmt);
4077 thisarginfo.linear_step
4078 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4079 thisarginfo.op
4080 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4081 thisarginfo.simd_lane_linear
4082 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4083 == boolean_true_node);
4084 /* If loop has been peeled for alignment, we need to adjust it. */
4085 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4086 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4087 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4088 {
4089 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4090 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4091 tree opt = TREE_TYPE (thisarginfo.op);
4092 bias = fold_convert (TREE_TYPE (step), bias);
4093 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4094 thisarginfo.op
4095 = fold_build2 (POINTER_TYPE_P (opt)
4096 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4097 thisarginfo.op, bias);
4098 }
4099 }
4100 else if (!vec_stmt
4101 && thisarginfo.dt != vect_constant_def
4102 && thisarginfo.dt != vect_external_def
4103 && loop_vinfo
4104 && TREE_CODE (op) == SSA_NAME
4105 && simple_iv (loop, loop_containing_stmt (stmt), op,
4106 &iv, false)
4107 && tree_fits_shwi_p (iv.step))
4108 {
4109 thisarginfo.linear_step = tree_to_shwi (iv.step);
4110 thisarginfo.op = iv.base;
4111 }
4112 else if ((thisarginfo.dt == vect_constant_def
4113 || thisarginfo.dt == vect_external_def)
4114 && POINTER_TYPE_P (TREE_TYPE (op)))
4115 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4116 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4117 linear too. */
4118 if (POINTER_TYPE_P (TREE_TYPE (op))
4119 && !thisarginfo.linear_step
4120 && !vec_stmt
4121 && thisarginfo.dt != vect_constant_def
4122 && thisarginfo.dt != vect_external_def
4123 && loop_vinfo
4124 && !slp_node
4125 && TREE_CODE (op) == SSA_NAME)
4126 vect_simd_lane_linear (op, loop, &thisarginfo);
4127
4128 arginfo.quick_push (thisarginfo);
4129 }
4130
4131 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4132 if (!vf.is_constant ())
4133 {
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4136 "not considering SIMD clones; not yet supported"
4137 " for variable-width vectors.\n");
4138 return false;
4139 }
4140
4141 unsigned int badness = 0;
4142 struct cgraph_node *bestn = NULL;
4143 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4144 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4145 else
4146 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4147 n = n->simdclone->next_clone)
4148 {
4149 unsigned int this_badness = 0;
4150 unsigned int num_calls;
4151 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4152 || n->simdclone->nargs != nargs)
4153 continue;
4154 if (num_calls != 1)
4155 this_badness += exact_log2 (num_calls) * 4096;
4156 if (n->simdclone->inbranch)
4157 this_badness += 8192;
4158 int target_badness = targetm.simd_clone.usable (n);
4159 if (target_badness < 0)
4160 continue;
4161 this_badness += target_badness * 512;
4162 /* FORNOW: Have to add code to add the mask argument. */
4163 if (n->simdclone->inbranch)
4164 continue;
4165 for (i = 0; i < nargs; i++)
4166 {
4167 switch (n->simdclone->args[i].arg_type)
4168 {
4169 case SIMD_CLONE_ARG_TYPE_VECTOR:
4170 if (!useless_type_conversion_p
4171 (n->simdclone->args[i].orig_type,
4172 TREE_TYPE (gimple_call_arg (stmt, i))))
4173 i = -1;
4174 else if (arginfo[i].dt == vect_constant_def
4175 || arginfo[i].dt == vect_external_def
4176 || arginfo[i].linear_step)
4177 this_badness += 64;
4178 break;
4179 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4180 if (arginfo[i].dt != vect_constant_def
4181 && arginfo[i].dt != vect_external_def)
4182 i = -1;
4183 break;
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4186 if (arginfo[i].dt == vect_constant_def
4187 || arginfo[i].dt == vect_external_def
4188 || (arginfo[i].linear_step
4189 != n->simdclone->args[i].linear_step))
4190 i = -1;
4191 break;
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4198 /* FORNOW */
4199 i = -1;
4200 break;
4201 case SIMD_CLONE_ARG_TYPE_MASK:
4202 gcc_unreachable ();
4203 }
4204 if (i == (size_t) -1)
4205 break;
4206 if (n->simdclone->args[i].alignment > arginfo[i].align)
4207 {
4208 i = -1;
4209 break;
4210 }
4211 if (arginfo[i].align)
4212 this_badness += (exact_log2 (arginfo[i].align)
4213 - exact_log2 (n->simdclone->args[i].alignment));
4214 }
4215 if (i == (size_t) -1)
4216 continue;
4217 if (bestn == NULL || this_badness < badness)
4218 {
4219 bestn = n;
4220 badness = this_badness;
4221 }
4222 }
4223
4224 if (bestn == NULL)
4225 return false;
4226
4227 for (i = 0; i < nargs; i++)
4228 if ((arginfo[i].dt == vect_constant_def
4229 || arginfo[i].dt == vect_external_def)
4230 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4231 {
4232 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4233 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4234 slp_node);
4235 if (arginfo[i].vectype == NULL
4236 || !constant_multiple_p (bestn->simdclone->simdlen,
4237 simd_clone_subparts (arginfo[i].vectype)))
4238 return false;
4239 }
4240
4241 fndecl = bestn->decl;
4242 nunits = bestn->simdclone->simdlen;
4243 ncopies = vector_unroll_factor (vf, nunits);
4244
4245 /* If the function isn't const, only allow it in simd loops where user
4246 has asserted that at least nunits consecutive iterations can be
4247 performed using SIMD instructions. */
4248 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4249 && gimple_vuse (stmt))
4250 return false;
4251
4252 /* Sanity check: make sure that at least one copy of the vectorized stmt
4253 needs to be generated. */
4254 gcc_assert (ncopies >= 1);
4255
4256 if (!vec_stmt) /* transformation not required. */
4257 {
4258 /* When the original call is pure or const but the SIMD ABI dictates
4259 an aggregate return we will have to use a virtual definition and
4260 in a loop eventually even need to add a virtual PHI. That's
4261 not straight-forward so allow to fix this up via renaming. */
4262 if (gimple_call_lhs (stmt)
4263 && !gimple_vdef (stmt)
4264 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4265 vinfo->any_known_not_updated_vssa = true;
4266 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4267 for (i = 0; i < nargs; i++)
4268 if ((bestn->simdclone->args[i].arg_type
4269 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4270 || (bestn->simdclone->args[i].arg_type
4271 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4272 {
4273 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4274 + 1,
4275 true);
4276 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4277 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4278 ? size_type_node : TREE_TYPE (arginfo[i].op);
4279 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4280 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4281 tree sll = arginfo[i].simd_lane_linear
4282 ? boolean_true_node : boolean_false_node;
4283 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4284 }
4285 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4286 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4287 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4288 dt, slp_node, cost_vec); */
4289 return true;
4290 }
4291
4292 /* Transform. */
4293
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4296
4297 /* Handle def. */
4298 scalar_dest = gimple_call_lhs (stmt);
4299 vec_dest = NULL_TREE;
4300 rtype = NULL_TREE;
4301 ratype = NULL_TREE;
4302 if (scalar_dest)
4303 {
4304 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4305 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4306 if (TREE_CODE (rtype) == ARRAY_TYPE)
4307 {
4308 ratype = rtype;
4309 rtype = TREE_TYPE (ratype);
4310 }
4311 }
4312
4313 auto_vec<vec<tree> > vec_oprnds;
4314 auto_vec<unsigned> vec_oprnds_i;
4315 vec_oprnds.safe_grow_cleared (nargs, true);
4316 vec_oprnds_i.safe_grow_cleared (nargs, true);
4317 for (j = 0; j < ncopies; ++j)
4318 {
4319 /* Build argument list for the vectorized call. */
4320 if (j == 0)
4321 vargs.create (nargs);
4322 else
4323 vargs.truncate (0);
4324
4325 for (i = 0; i < nargs; i++)
4326 {
4327 unsigned int k, l, m, o;
4328 tree atype;
4329 op = gimple_call_arg (stmt, i);
4330 switch (bestn->simdclone->args[i].arg_type)
4331 {
4332 case SIMD_CLONE_ARG_TYPE_VECTOR:
4333 atype = bestn->simdclone->args[i].vector_type;
4334 o = vector_unroll_factor (nunits,
4335 simd_clone_subparts (atype));
4336 for (m = j * o; m < (j + 1) * o; m++)
4337 {
4338 if (simd_clone_subparts (atype)
4339 < simd_clone_subparts (arginfo[i].vectype))
4340 {
4341 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4342 k = (simd_clone_subparts (arginfo[i].vectype)
4343 / simd_clone_subparts (atype));
4344 gcc_assert ((k & (k - 1)) == 0);
4345 if (m == 0)
4346 {
4347 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4348 ncopies * o / k, op,
4349 &vec_oprnds[i]);
4350 vec_oprnds_i[i] = 0;
4351 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4352 }
4353 else
4354 {
4355 vec_oprnd0 = arginfo[i].op;
4356 if ((m & (k - 1)) == 0)
4357 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4358 }
4359 arginfo[i].op = vec_oprnd0;
4360 vec_oprnd0
4361 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4362 bitsize_int (prec),
4363 bitsize_int ((m & (k - 1)) * prec));
4364 gassign *new_stmt
4365 = gimple_build_assign (make_ssa_name (atype),
4366 vec_oprnd0);
4367 vect_finish_stmt_generation (vinfo, stmt_info,
4368 new_stmt, gsi);
4369 vargs.safe_push (gimple_assign_lhs (new_stmt));
4370 }
4371 else
4372 {
4373 k = (simd_clone_subparts (atype)
4374 / simd_clone_subparts (arginfo[i].vectype));
4375 gcc_assert ((k & (k - 1)) == 0);
4376 vec<constructor_elt, va_gc> *ctor_elts;
4377 if (k != 1)
4378 vec_alloc (ctor_elts, k);
4379 else
4380 ctor_elts = NULL;
4381 for (l = 0; l < k; l++)
4382 {
4383 if (m == 0 && l == 0)
4384 {
4385 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4386 k * o * ncopies,
4387 op,
4388 &vec_oprnds[i]);
4389 vec_oprnds_i[i] = 0;
4390 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4391 }
4392 else
4393 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4394 arginfo[i].op = vec_oprnd0;
4395 if (k == 1)
4396 break;
4397 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4398 vec_oprnd0);
4399 }
4400 if (k == 1)
4401 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4402 atype))
4403 {
4404 vec_oprnd0
4405 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4406 gassign *new_stmt
4407 = gimple_build_assign (make_ssa_name (atype),
4408 vec_oprnd0);
4409 vect_finish_stmt_generation (vinfo, stmt_info,
4410 new_stmt, gsi);
4411 vargs.safe_push (gimple_assign_lhs (new_stmt));
4412 }
4413 else
4414 vargs.safe_push (vec_oprnd0);
4415 else
4416 {
4417 vec_oprnd0 = build_constructor (atype, ctor_elts);
4418 gassign *new_stmt
4419 = gimple_build_assign (make_ssa_name (atype),
4420 vec_oprnd0);
4421 vect_finish_stmt_generation (vinfo, stmt_info,
4422 new_stmt, gsi);
4423 vargs.safe_push (gimple_assign_lhs (new_stmt));
4424 }
4425 }
4426 }
4427 break;
4428 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4429 vargs.safe_push (op);
4430 break;
4431 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4432 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4433 if (j == 0)
4434 {
4435 gimple_seq stmts;
4436 arginfo[i].op
4437 = force_gimple_operand (unshare_expr (arginfo[i].op),
4438 &stmts, true, NULL_TREE);
4439 if (stmts != NULL)
4440 {
4441 basic_block new_bb;
4442 edge pe = loop_preheader_edge (loop);
4443 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4444 gcc_assert (!new_bb);
4445 }
4446 if (arginfo[i].simd_lane_linear)
4447 {
4448 vargs.safe_push (arginfo[i].op);
4449 break;
4450 }
4451 tree phi_res = copy_ssa_name (op);
4452 gphi *new_phi = create_phi_node (phi_res, loop->header);
4453 add_phi_arg (new_phi, arginfo[i].op,
4454 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4455 enum tree_code code
4456 = POINTER_TYPE_P (TREE_TYPE (op))
4457 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4458 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4459 ? sizetype : TREE_TYPE (op);
4460 poly_widest_int cst
4461 = wi::mul (bestn->simdclone->args[i].linear_step,
4462 ncopies * nunits);
4463 tree tcst = wide_int_to_tree (type, cst);
4464 tree phi_arg = copy_ssa_name (op);
4465 gassign *new_stmt
4466 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4467 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4468 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4469 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4470 UNKNOWN_LOCATION);
4471 arginfo[i].op = phi_res;
4472 vargs.safe_push (phi_res);
4473 }
4474 else
4475 {
4476 enum tree_code code
4477 = POINTER_TYPE_P (TREE_TYPE (op))
4478 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4479 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4480 ? sizetype : TREE_TYPE (op);
4481 poly_widest_int cst
4482 = wi::mul (bestn->simdclone->args[i].linear_step,
4483 j * nunits);
4484 tree tcst = wide_int_to_tree (type, cst);
4485 new_temp = make_ssa_name (TREE_TYPE (op));
4486 gassign *new_stmt
4487 = gimple_build_assign (new_temp, code,
4488 arginfo[i].op, tcst);
4489 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4490 vargs.safe_push (new_temp);
4491 }
4492 break;
4493 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4494 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4495 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4496 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4497 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4498 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4499 default:
4500 gcc_unreachable ();
4501 }
4502 }
4503
4504 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4505 if (vec_dest)
4506 {
4507 gcc_assert (ratype
4508 || known_eq (simd_clone_subparts (rtype), nunits));
4509 if (ratype)
4510 new_temp = create_tmp_var (ratype);
4511 else if (useless_type_conversion_p (vectype, rtype))
4512 new_temp = make_ssa_name (vec_dest, new_call);
4513 else
4514 new_temp = make_ssa_name (rtype, new_call);
4515 gimple_call_set_lhs (new_call, new_temp);
4516 }
4517 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4518 gimple *new_stmt = new_call;
4519
4520 if (vec_dest)
4521 {
4522 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4523 {
4524 unsigned int k, l;
4525 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4526 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4527 k = vector_unroll_factor (nunits,
4528 simd_clone_subparts (vectype));
4529 gcc_assert ((k & (k - 1)) == 0);
4530 for (l = 0; l < k; l++)
4531 {
4532 tree t;
4533 if (ratype)
4534 {
4535 t = build_fold_addr_expr (new_temp);
4536 t = build2 (MEM_REF, vectype, t,
4537 build_int_cst (TREE_TYPE (t), l * bytes));
4538 }
4539 else
4540 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4541 bitsize_int (prec), bitsize_int (l * prec));
4542 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4543 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4544
4545 if (j == 0 && l == 0)
4546 *vec_stmt = new_stmt;
4547 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4548 }
4549
4550 if (ratype)
4551 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4552 continue;
4553 }
4554 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4555 {
4556 unsigned int k = (simd_clone_subparts (vectype)
4557 / simd_clone_subparts (rtype));
4558 gcc_assert ((k & (k - 1)) == 0);
4559 if ((j & (k - 1)) == 0)
4560 vec_alloc (ret_ctor_elts, k);
4561 if (ratype)
4562 {
4563 unsigned int m, o;
4564 o = vector_unroll_factor (nunits,
4565 simd_clone_subparts (rtype));
4566 for (m = 0; m < o; m++)
4567 {
4568 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4569 size_int (m), NULL_TREE, NULL_TREE);
4570 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4571 tem);
4572 vect_finish_stmt_generation (vinfo, stmt_info,
4573 new_stmt, gsi);
4574 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4575 gimple_assign_lhs (new_stmt));
4576 }
4577 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4578 }
4579 else
4580 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4581 if ((j & (k - 1)) != k - 1)
4582 continue;
4583 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4584 new_stmt
4585 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4586 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4587
4588 if ((unsigned) j == k - 1)
4589 *vec_stmt = new_stmt;
4590 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4591 continue;
4592 }
4593 else if (ratype)
4594 {
4595 tree t = build_fold_addr_expr (new_temp);
4596 t = build2 (MEM_REF, vectype, t,
4597 build_int_cst (TREE_TYPE (t), 0));
4598 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4599 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4600 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4601 }
4602 else if (!useless_type_conversion_p (vectype, rtype))
4603 {
4604 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4605 new_stmt
4606 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4607 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4608 }
4609 }
4610
4611 if (j == 0)
4612 *vec_stmt = new_stmt;
4613 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4614 }
4615
4616 for (i = 0; i < nargs; ++i)
4617 {
4618 vec<tree> oprndsi = vec_oprnds[i];
4619 oprndsi.release ();
4620 }
4621 vargs.release ();
4622
4623 /* The call in STMT might prevent it from being removed in dce.
4624 We however cannot remove it here, due to the way the ssa name
4625 it defines is mapped to the new definition. So just replace
4626 rhs of the statement with something harmless. */
4627
4628 if (slp_node)
4629 return true;
4630
4631 gimple *new_stmt;
4632 if (scalar_dest)
4633 {
4634 type = TREE_TYPE (scalar_dest);
4635 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4636 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4637 }
4638 else
4639 new_stmt = gimple_build_nop ();
4640 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4641 unlink_stmt_vdef (stmt);
4642
4643 return true;
4644 }
4645
4646
4647 /* Function vect_gen_widened_results_half
4648
4649 Create a vector stmt whose code, type, number of arguments, and result
4650 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4651 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4652 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4653 needs to be created (DECL is a function-decl of a target-builtin).
4654 STMT_INFO is the original scalar stmt that we are vectorizing. */
4655
4656 static gimple *
4657 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4658 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4659 tree vec_dest, gimple_stmt_iterator *gsi,
4660 stmt_vec_info stmt_info)
4661 {
4662 gimple *new_stmt;
4663 tree new_temp;
4664
4665 /* Generate half of the widened result: */
4666 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4667 if (op_type != binary_op)
4668 vec_oprnd1 = NULL;
4669 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4670 new_temp = make_ssa_name (vec_dest, new_stmt);
4671 gimple_assign_set_lhs (new_stmt, new_temp);
4672 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4673
4674 return new_stmt;
4675 }
4676
4677
4678 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4679 For multi-step conversions store the resulting vectors and call the function
4680 recursively. */
4681
4682 static void
4683 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4684 int multi_step_cvt,
4685 stmt_vec_info stmt_info,
4686 vec<tree> &vec_dsts,
4687 gimple_stmt_iterator *gsi,
4688 slp_tree slp_node, enum tree_code code)
4689 {
4690 unsigned int i;
4691 tree vop0, vop1, new_tmp, vec_dest;
4692
4693 vec_dest = vec_dsts.pop ();
4694
4695 for (i = 0; i < vec_oprnds->length (); i += 2)
4696 {
4697 /* Create demotion operation. */
4698 vop0 = (*vec_oprnds)[i];
4699 vop1 = (*vec_oprnds)[i + 1];
4700 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4701 new_tmp = make_ssa_name (vec_dest, new_stmt);
4702 gimple_assign_set_lhs (new_stmt, new_tmp);
4703 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4704
4705 if (multi_step_cvt)
4706 /* Store the resulting vector for next recursive call. */
4707 (*vec_oprnds)[i/2] = new_tmp;
4708 else
4709 {
4710 /* This is the last step of the conversion sequence. Store the
4711 vectors in SLP_NODE or in vector info of the scalar statement
4712 (or in STMT_VINFO_RELATED_STMT chain). */
4713 if (slp_node)
4714 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4715 else
4716 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4717 }
4718 }
4719
4720 /* For multi-step demotion operations we first generate demotion operations
4721 from the source type to the intermediate types, and then combine the
4722 results (stored in VEC_OPRNDS) in demotion operation to the destination
4723 type. */
4724 if (multi_step_cvt)
4725 {
4726 /* At each level of recursion we have half of the operands we had at the
4727 previous level. */
4728 vec_oprnds->truncate ((i+1)/2);
4729 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4730 multi_step_cvt - 1,
4731 stmt_info, vec_dsts, gsi,
4732 slp_node, VEC_PACK_TRUNC_EXPR);
4733 }
4734
4735 vec_dsts.quick_push (vec_dest);
4736 }
4737
4738
4739 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4740 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4741 STMT_INFO. For multi-step conversions store the resulting vectors and
4742 call the function recursively. */
4743
4744 static void
4745 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4746 vec<tree> *vec_oprnds0,
4747 vec<tree> *vec_oprnds1,
4748 stmt_vec_info stmt_info, tree vec_dest,
4749 gimple_stmt_iterator *gsi,
4750 enum tree_code code1,
4751 enum tree_code code2, int op_type)
4752 {
4753 int i;
4754 tree vop0, vop1, new_tmp1, new_tmp2;
4755 gimple *new_stmt1, *new_stmt2;
4756 vec<tree> vec_tmp = vNULL;
4757
4758 vec_tmp.create (vec_oprnds0->length () * 2);
4759 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4760 {
4761 if (op_type == binary_op)
4762 vop1 = (*vec_oprnds1)[i];
4763 else
4764 vop1 = NULL_TREE;
4765
4766 /* Generate the two halves of promotion operation. */
4767 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4768 op_type, vec_dest, gsi,
4769 stmt_info);
4770 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4771 op_type, vec_dest, gsi,
4772 stmt_info);
4773 if (is_gimple_call (new_stmt1))
4774 {
4775 new_tmp1 = gimple_call_lhs (new_stmt1);
4776 new_tmp2 = gimple_call_lhs (new_stmt2);
4777 }
4778 else
4779 {
4780 new_tmp1 = gimple_assign_lhs (new_stmt1);
4781 new_tmp2 = gimple_assign_lhs (new_stmt2);
4782 }
4783
4784 /* Store the results for the next step. */
4785 vec_tmp.quick_push (new_tmp1);
4786 vec_tmp.quick_push (new_tmp2);
4787 }
4788
4789 vec_oprnds0->release ();
4790 *vec_oprnds0 = vec_tmp;
4791 }
4792
4793 /* Create vectorized promotion stmts for widening stmts using only half the
4794 potential vector size for input. */
4795 static void
4796 vect_create_half_widening_stmts (vec_info *vinfo,
4797 vec<tree> *vec_oprnds0,
4798 vec<tree> *vec_oprnds1,
4799 stmt_vec_info stmt_info, tree vec_dest,
4800 gimple_stmt_iterator *gsi,
4801 enum tree_code code1,
4802 int op_type)
4803 {
4804 int i;
4805 tree vop0, vop1;
4806 gimple *new_stmt1;
4807 gimple *new_stmt2;
4808 gimple *new_stmt3;
4809 vec<tree> vec_tmp = vNULL;
4810
4811 vec_tmp.create (vec_oprnds0->length ());
4812 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4813 {
4814 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4815
4816 gcc_assert (op_type == binary_op);
4817 vop1 = (*vec_oprnds1)[i];
4818
4819 /* Widen the first vector input. */
4820 out_type = TREE_TYPE (vec_dest);
4821 new_tmp1 = make_ssa_name (out_type);
4822 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4824 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4825 {
4826 /* Widen the second vector input. */
4827 new_tmp2 = make_ssa_name (out_type);
4828 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4829 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4830 /* Perform the operation. With both vector inputs widened. */
4831 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4832 }
4833 else
4834 {
4835 /* Perform the operation. With the single vector input widened. */
4836 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4837 }
4838
4839 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4840 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4841 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4842
4843 /* Store the results for the next step. */
4844 vec_tmp.quick_push (new_tmp3);
4845 }
4846
4847 vec_oprnds0->release ();
4848 *vec_oprnds0 = vec_tmp;
4849 }
4850
4851
4852 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4853 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4854 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4855 Return true if STMT_INFO is vectorizable in this way. */
4856
4857 static bool
4858 vectorizable_conversion (vec_info *vinfo,
4859 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4860 gimple **vec_stmt, slp_tree slp_node,
4861 stmt_vector_for_cost *cost_vec)
4862 {
4863 tree vec_dest;
4864 tree scalar_dest;
4865 tree op0, op1 = NULL_TREE;
4866 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4867 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4868 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4869 tree new_temp;
4870 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4871 int ndts = 2;
4872 poly_uint64 nunits_in;
4873 poly_uint64 nunits_out;
4874 tree vectype_out, vectype_in;
4875 int ncopies, i;
4876 tree lhs_type, rhs_type;
4877 enum { NARROW, NONE, WIDEN } modifier;
4878 vec<tree> vec_oprnds0 = vNULL;
4879 vec<tree> vec_oprnds1 = vNULL;
4880 tree vop0;
4881 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4882 int multi_step_cvt = 0;
4883 vec<tree> interm_types = vNULL;
4884 tree intermediate_type, cvt_type = NULL_TREE;
4885 int op_type;
4886 unsigned short fltsz;
4887
4888 /* Is STMT a vectorizable conversion? */
4889
4890 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4891 return false;
4892
4893 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4894 && ! vec_stmt)
4895 return false;
4896
4897 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4898 if (!stmt)
4899 return false;
4900
4901 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4902 return false;
4903
4904 code = gimple_assign_rhs_code (stmt);
4905 if (!CONVERT_EXPR_CODE_P (code)
4906 && code != FIX_TRUNC_EXPR
4907 && code != FLOAT_EXPR
4908 && code != WIDEN_PLUS_EXPR
4909 && code != WIDEN_MINUS_EXPR
4910 && code != WIDEN_MULT_EXPR
4911 && code != WIDEN_LSHIFT_EXPR)
4912 return false;
4913
4914 bool widen_arith = (code == WIDEN_PLUS_EXPR
4915 || code == WIDEN_MINUS_EXPR
4916 || code == WIDEN_MULT_EXPR
4917 || code == WIDEN_LSHIFT_EXPR);
4918 op_type = TREE_CODE_LENGTH (code);
4919
4920 /* Check types of lhs and rhs. */
4921 scalar_dest = gimple_assign_lhs (stmt);
4922 lhs_type = TREE_TYPE (scalar_dest);
4923 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4924
4925 /* Check the operands of the operation. */
4926 slp_tree slp_op0, slp_op1 = NULL;
4927 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4928 0, &op0, &slp_op0, &dt[0], &vectype_in))
4929 {
4930 if (dump_enabled_p ())
4931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4932 "use not simple.\n");
4933 return false;
4934 }
4935
4936 rhs_type = TREE_TYPE (op0);
4937 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4938 && !((INTEGRAL_TYPE_P (lhs_type)
4939 && INTEGRAL_TYPE_P (rhs_type))
4940 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4941 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4942 return false;
4943
4944 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4945 && ((INTEGRAL_TYPE_P (lhs_type)
4946 && !type_has_mode_precision_p (lhs_type))
4947 || (INTEGRAL_TYPE_P (rhs_type)
4948 && !type_has_mode_precision_p (rhs_type))))
4949 {
4950 if (dump_enabled_p ())
4951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4952 "type conversion to/from bit-precision unsupported."
4953 "\n");
4954 return false;
4955 }
4956
4957 if (op_type == binary_op)
4958 {
4959 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4960 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4961
4962 op1 = gimple_assign_rhs2 (stmt);
4963 tree vectype1_in;
4964 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4965 &op1, &slp_op1, &dt[1], &vectype1_in))
4966 {
4967 if (dump_enabled_p ())
4968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4969 "use not simple.\n");
4970 return false;
4971 }
4972 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4973 OP1. */
4974 if (!vectype_in)
4975 vectype_in = vectype1_in;
4976 }
4977
4978 /* If op0 is an external or constant def, infer the vector type
4979 from the scalar type. */
4980 if (!vectype_in)
4981 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4982 if (vec_stmt)
4983 gcc_assert (vectype_in);
4984 if (!vectype_in)
4985 {
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4988 "no vectype for scalar type %T\n", rhs_type);
4989
4990 return false;
4991 }
4992
4993 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4994 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4995 {
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4998 "can't convert between boolean and non "
4999 "boolean vectors %T\n", rhs_type);
5000
5001 return false;
5002 }
5003
5004 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5005 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5006 if (known_eq (nunits_out, nunits_in))
5007 if (widen_arith)
5008 modifier = WIDEN;
5009 else
5010 modifier = NONE;
5011 else if (multiple_p (nunits_out, nunits_in))
5012 modifier = NARROW;
5013 else
5014 {
5015 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5016 modifier = WIDEN;
5017 }
5018
5019 /* Multiple types in SLP are handled by creating the appropriate number of
5020 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5021 case of SLP. */
5022 if (slp_node)
5023 ncopies = 1;
5024 else if (modifier == NARROW)
5025 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5026 else
5027 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5028
5029 /* Sanity check: make sure that at least one copy of the vectorized stmt
5030 needs to be generated. */
5031 gcc_assert (ncopies >= 1);
5032
5033 bool found_mode = false;
5034 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5035 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5036 opt_scalar_mode rhs_mode_iter;
5037
5038 /* Supportable by target? */
5039 switch (modifier)
5040 {
5041 case NONE:
5042 if (code != FIX_TRUNC_EXPR
5043 && code != FLOAT_EXPR
5044 && !CONVERT_EXPR_CODE_P (code))
5045 return false;
5046 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5047 break;
5048 /* FALLTHRU */
5049 unsupported:
5050 if (dump_enabled_p ())
5051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5052 "conversion not supported by target.\n");
5053 return false;
5054
5055 case WIDEN:
5056 if (known_eq (nunits_in, nunits_out))
5057 {
5058 if (!supportable_half_widening_operation (code, vectype_out,
5059 vectype_in, &code1))
5060 goto unsupported;
5061 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5062 break;
5063 }
5064 if (supportable_widening_operation (vinfo, code, stmt_info,
5065 vectype_out, vectype_in, &code1,
5066 &code2, &multi_step_cvt,
5067 &interm_types))
5068 {
5069 /* Binary widening operation can only be supported directly by the
5070 architecture. */
5071 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5072 break;
5073 }
5074
5075 if (code != FLOAT_EXPR
5076 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5077 goto unsupported;
5078
5079 fltsz = GET_MODE_SIZE (lhs_mode);
5080 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5081 {
5082 rhs_mode = rhs_mode_iter.require ();
5083 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5084 break;
5085
5086 cvt_type
5087 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5088 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5089 if (cvt_type == NULL_TREE)
5090 goto unsupported;
5091
5092 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5093 {
5094 if (!supportable_convert_operation (code, vectype_out,
5095 cvt_type, &codecvt1))
5096 goto unsupported;
5097 }
5098 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5099 vectype_out, cvt_type,
5100 &codecvt1, &codecvt2,
5101 &multi_step_cvt,
5102 &interm_types))
5103 continue;
5104 else
5105 gcc_assert (multi_step_cvt == 0);
5106
5107 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5108 cvt_type,
5109 vectype_in, &code1, &code2,
5110 &multi_step_cvt, &interm_types))
5111 {
5112 found_mode = true;
5113 break;
5114 }
5115 }
5116
5117 if (!found_mode)
5118 goto unsupported;
5119
5120 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5121 codecvt2 = ERROR_MARK;
5122 else
5123 {
5124 multi_step_cvt++;
5125 interm_types.safe_push (cvt_type);
5126 cvt_type = NULL_TREE;
5127 }
5128 break;
5129
5130 case NARROW:
5131 gcc_assert (op_type == unary_op);
5132 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5133 &code1, &multi_step_cvt,
5134 &interm_types))
5135 break;
5136
5137 if (code != FIX_TRUNC_EXPR
5138 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5139 goto unsupported;
5140
5141 cvt_type
5142 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5143 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5144 if (cvt_type == NULL_TREE)
5145 goto unsupported;
5146 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5147 &codecvt1))
5148 goto unsupported;
5149 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5150 &code1, &multi_step_cvt,
5151 &interm_types))
5152 break;
5153 goto unsupported;
5154
5155 default:
5156 gcc_unreachable ();
5157 }
5158
5159 if (!vec_stmt) /* transformation not required. */
5160 {
5161 if (slp_node
5162 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5163 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5164 {
5165 if (dump_enabled_p ())
5166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5167 "incompatible vector types for invariants\n");
5168 return false;
5169 }
5170 DUMP_VECT_SCOPE ("vectorizable_conversion");
5171 if (modifier == NONE)
5172 {
5173 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5174 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5175 cost_vec);
5176 }
5177 else if (modifier == NARROW)
5178 {
5179 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5180 /* The final packing step produces one vector result per copy. */
5181 unsigned int nvectors
5182 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5183 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5184 multi_step_cvt, cost_vec,
5185 widen_arith);
5186 }
5187 else
5188 {
5189 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5190 /* The initial unpacking step produces two vector results
5191 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5192 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5193 unsigned int nvectors
5194 = (slp_node
5195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5196 : ncopies * 2);
5197 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5198 multi_step_cvt, cost_vec,
5199 widen_arith);
5200 }
5201 interm_types.release ();
5202 return true;
5203 }
5204
5205 /* Transform. */
5206 if (dump_enabled_p ())
5207 dump_printf_loc (MSG_NOTE, vect_location,
5208 "transform conversion. ncopies = %d.\n", ncopies);
5209
5210 if (op_type == binary_op)
5211 {
5212 if (CONSTANT_CLASS_P (op0))
5213 op0 = fold_convert (TREE_TYPE (op1), op0);
5214 else if (CONSTANT_CLASS_P (op1))
5215 op1 = fold_convert (TREE_TYPE (op0), op1);
5216 }
5217
5218 /* In case of multi-step conversion, we first generate conversion operations
5219 to the intermediate types, and then from that types to the final one.
5220 We create vector destinations for the intermediate type (TYPES) received
5221 from supportable_*_operation, and store them in the correct order
5222 for future use in vect_create_vectorized_*_stmts (). */
5223 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5224 vec_dest = vect_create_destination_var (scalar_dest,
5225 (cvt_type && modifier == WIDEN)
5226 ? cvt_type : vectype_out);
5227 vec_dsts.quick_push (vec_dest);
5228
5229 if (multi_step_cvt)
5230 {
5231 for (i = interm_types.length () - 1;
5232 interm_types.iterate (i, &intermediate_type); i--)
5233 {
5234 vec_dest = vect_create_destination_var (scalar_dest,
5235 intermediate_type);
5236 vec_dsts.quick_push (vec_dest);
5237 }
5238 }
5239
5240 if (cvt_type)
5241 vec_dest = vect_create_destination_var (scalar_dest,
5242 modifier == WIDEN
5243 ? vectype_out : cvt_type);
5244
5245 int ninputs = 1;
5246 if (!slp_node)
5247 {
5248 if (modifier == WIDEN)
5249 ;
5250 else if (modifier == NARROW)
5251 {
5252 if (multi_step_cvt)
5253 ninputs = vect_pow2 (multi_step_cvt);
5254 ninputs *= 2;
5255 }
5256 }
5257
5258 switch (modifier)
5259 {
5260 case NONE:
5261 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5262 op0, &vec_oprnds0);
5263 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5264 {
5265 /* Arguments are ready, create the new vector stmt. */
5266 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5267 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5268 new_temp = make_ssa_name (vec_dest, new_stmt);
5269 gimple_assign_set_lhs (new_stmt, new_temp);
5270 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5271
5272 if (slp_node)
5273 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5274 else
5275 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5276 }
5277 break;
5278
5279 case WIDEN:
5280 /* In case the vectorization factor (VF) is bigger than the number
5281 of elements that we can fit in a vectype (nunits), we have to
5282 generate more than one vector stmt - i.e - we need to "unroll"
5283 the vector stmt by a factor VF/nunits. */
5284 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5285 op0, &vec_oprnds0,
5286 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5287 &vec_oprnds1);
5288 if (code == WIDEN_LSHIFT_EXPR)
5289 {
5290 int oprnds_size = vec_oprnds0.length ();
5291 vec_oprnds1.create (oprnds_size);
5292 for (i = 0; i < oprnds_size; ++i)
5293 vec_oprnds1.quick_push (op1);
5294 }
5295 /* Arguments are ready. Create the new vector stmts. */
5296 for (i = multi_step_cvt; i >= 0; i--)
5297 {
5298 tree this_dest = vec_dsts[i];
5299 enum tree_code c1 = code1, c2 = code2;
5300 if (i == 0 && codecvt2 != ERROR_MARK)
5301 {
5302 c1 = codecvt1;
5303 c2 = codecvt2;
5304 }
5305 if (known_eq (nunits_out, nunits_in))
5306 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5307 &vec_oprnds1, stmt_info,
5308 this_dest, gsi,
5309 c1, op_type);
5310 else
5311 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5312 &vec_oprnds1, stmt_info,
5313 this_dest, gsi,
5314 c1, c2, op_type);
5315 }
5316
5317 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5318 {
5319 gimple *new_stmt;
5320 if (cvt_type)
5321 {
5322 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5323 new_temp = make_ssa_name (vec_dest);
5324 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5325 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5326 }
5327 else
5328 new_stmt = SSA_NAME_DEF_STMT (vop0);
5329
5330 if (slp_node)
5331 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5332 else
5333 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5334 }
5335 break;
5336
5337 case NARROW:
5338 /* In case the vectorization factor (VF) is bigger than the number
5339 of elements that we can fit in a vectype (nunits), we have to
5340 generate more than one vector stmt - i.e - we need to "unroll"
5341 the vector stmt by a factor VF/nunits. */
5342 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5343 op0, &vec_oprnds0);
5344 /* Arguments are ready. Create the new vector stmts. */
5345 if (cvt_type)
5346 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5347 {
5348 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5349 new_temp = make_ssa_name (vec_dest);
5350 gassign *new_stmt
5351 = gimple_build_assign (new_temp, codecvt1, vop0);
5352 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5353 vec_oprnds0[i] = new_temp;
5354 }
5355
5356 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5357 multi_step_cvt,
5358 stmt_info, vec_dsts, gsi,
5359 slp_node, code1);
5360 break;
5361 }
5362 if (!slp_node)
5363 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5364
5365 vec_oprnds0.release ();
5366 vec_oprnds1.release ();
5367 interm_types.release ();
5368
5369 return true;
5370 }
5371
5372 /* Return true if we can assume from the scalar form of STMT_INFO that
5373 neither the scalar nor the vector forms will generate code. STMT_INFO
5374 is known not to involve a data reference. */
5375
5376 bool
5377 vect_nop_conversion_p (stmt_vec_info stmt_info)
5378 {
5379 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5380 if (!stmt)
5381 return false;
5382
5383 tree lhs = gimple_assign_lhs (stmt);
5384 tree_code code = gimple_assign_rhs_code (stmt);
5385 tree rhs = gimple_assign_rhs1 (stmt);
5386
5387 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5388 return true;
5389
5390 if (CONVERT_EXPR_CODE_P (code))
5391 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5392
5393 return false;
5394 }
5395
5396 /* Function vectorizable_assignment.
5397
5398 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5399 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5400 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5401 Return true if STMT_INFO is vectorizable in this way. */
5402
5403 static bool
5404 vectorizable_assignment (vec_info *vinfo,
5405 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5406 gimple **vec_stmt, slp_tree slp_node,
5407 stmt_vector_for_cost *cost_vec)
5408 {
5409 tree vec_dest;
5410 tree scalar_dest;
5411 tree op;
5412 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5413 tree new_temp;
5414 enum vect_def_type dt[1] = {vect_unknown_def_type};
5415 int ndts = 1;
5416 int ncopies;
5417 int i;
5418 vec<tree> vec_oprnds = vNULL;
5419 tree vop;
5420 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5421 enum tree_code code;
5422 tree vectype_in;
5423
5424 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5425 return false;
5426
5427 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5428 && ! vec_stmt)
5429 return false;
5430
5431 /* Is vectorizable assignment? */
5432 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5433 if (!stmt)
5434 return false;
5435
5436 scalar_dest = gimple_assign_lhs (stmt);
5437 if (TREE_CODE (scalar_dest) != SSA_NAME)
5438 return false;
5439
5440 if (STMT_VINFO_DATA_REF (stmt_info))
5441 return false;
5442
5443 code = gimple_assign_rhs_code (stmt);
5444 if (!(gimple_assign_single_p (stmt)
5445 || code == PAREN_EXPR
5446 || CONVERT_EXPR_CODE_P (code)))
5447 return false;
5448
5449 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5450 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5451
5452 /* Multiple types in SLP are handled by creating the appropriate number of
5453 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5454 case of SLP. */
5455 if (slp_node)
5456 ncopies = 1;
5457 else
5458 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5459
5460 gcc_assert (ncopies >= 1);
5461
5462 slp_tree slp_op;
5463 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5464 &dt[0], &vectype_in))
5465 {
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5468 "use not simple.\n");
5469 return false;
5470 }
5471 if (!vectype_in)
5472 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5473
5474 /* We can handle NOP_EXPR conversions that do not change the number
5475 of elements or the vector size. */
5476 if ((CONVERT_EXPR_CODE_P (code)
5477 || code == VIEW_CONVERT_EXPR)
5478 && (!vectype_in
5479 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5480 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5481 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5482 return false;
5483
5484 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5485 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5486 {
5487 if (dump_enabled_p ())
5488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5489 "can't convert between boolean and non "
5490 "boolean vectors %T\n", TREE_TYPE (op));
5491
5492 return false;
5493 }
5494
5495 /* We do not handle bit-precision changes. */
5496 if ((CONVERT_EXPR_CODE_P (code)
5497 || code == VIEW_CONVERT_EXPR)
5498 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5499 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5500 || !type_has_mode_precision_p (TREE_TYPE (op)))
5501 /* But a conversion that does not change the bit-pattern is ok. */
5502 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5503 > TYPE_PRECISION (TREE_TYPE (op)))
5504 && TYPE_UNSIGNED (TREE_TYPE (op))))
5505 {
5506 if (dump_enabled_p ())
5507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5508 "type conversion to/from bit-precision "
5509 "unsupported.\n");
5510 return false;
5511 }
5512
5513 if (!vec_stmt) /* transformation not required. */
5514 {
5515 if (slp_node
5516 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5517 {
5518 if (dump_enabled_p ())
5519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5520 "incompatible vector types for invariants\n");
5521 return false;
5522 }
5523 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5524 DUMP_VECT_SCOPE ("vectorizable_assignment");
5525 if (!vect_nop_conversion_p (stmt_info))
5526 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5527 cost_vec);
5528 return true;
5529 }
5530
5531 /* Transform. */
5532 if (dump_enabled_p ())
5533 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5534
5535 /* Handle def. */
5536 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5537
5538 /* Handle use. */
5539 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5540
5541 /* Arguments are ready. create the new vector stmt. */
5542 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5543 {
5544 if (CONVERT_EXPR_CODE_P (code)
5545 || code == VIEW_CONVERT_EXPR)
5546 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5547 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5548 new_temp = make_ssa_name (vec_dest, new_stmt);
5549 gimple_assign_set_lhs (new_stmt, new_temp);
5550 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5551 if (slp_node)
5552 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5553 else
5554 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5555 }
5556 if (!slp_node)
5557 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5558
5559 vec_oprnds.release ();
5560 return true;
5561 }
5562
5563
5564 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5565 either as shift by a scalar or by a vector. */
5566
5567 bool
5568 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5569 {
5570
5571 machine_mode vec_mode;
5572 optab optab;
5573 int icode;
5574 tree vectype;
5575
5576 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5577 if (!vectype)
5578 return false;
5579
5580 optab = optab_for_tree_code (code, vectype, optab_scalar);
5581 if (!optab
5582 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5583 {
5584 optab = optab_for_tree_code (code, vectype, optab_vector);
5585 if (!optab
5586 || (optab_handler (optab, TYPE_MODE (vectype))
5587 == CODE_FOR_nothing))
5588 return false;
5589 }
5590
5591 vec_mode = TYPE_MODE (vectype);
5592 icode = (int) optab_handler (optab, vec_mode);
5593 if (icode == CODE_FOR_nothing)
5594 return false;
5595
5596 return true;
5597 }
5598
5599
5600 /* Function vectorizable_shift.
5601
5602 Check if STMT_INFO performs a shift operation that can be vectorized.
5603 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5604 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5605 Return true if STMT_INFO is vectorizable in this way. */
5606
5607 static bool
5608 vectorizable_shift (vec_info *vinfo,
5609 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5610 gimple **vec_stmt, slp_tree slp_node,
5611 stmt_vector_for_cost *cost_vec)
5612 {
5613 tree vec_dest;
5614 tree scalar_dest;
5615 tree op0, op1 = NULL;
5616 tree vec_oprnd1 = NULL_TREE;
5617 tree vectype;
5618 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5619 enum tree_code code;
5620 machine_mode vec_mode;
5621 tree new_temp;
5622 optab optab;
5623 int icode;
5624 machine_mode optab_op2_mode;
5625 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5626 int ndts = 2;
5627 poly_uint64 nunits_in;
5628 poly_uint64 nunits_out;
5629 tree vectype_out;
5630 tree op1_vectype;
5631 int ncopies;
5632 int i;
5633 vec<tree> vec_oprnds0 = vNULL;
5634 vec<tree> vec_oprnds1 = vNULL;
5635 tree vop0, vop1;
5636 unsigned int k;
5637 bool scalar_shift_arg = true;
5638 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5639 bool incompatible_op1_vectype_p = false;
5640
5641 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5642 return false;
5643
5644 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5645 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5646 && ! vec_stmt)
5647 return false;
5648
5649 /* Is STMT a vectorizable binary/unary operation? */
5650 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5651 if (!stmt)
5652 return false;
5653
5654 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5655 return false;
5656
5657 code = gimple_assign_rhs_code (stmt);
5658
5659 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5660 || code == RROTATE_EXPR))
5661 return false;
5662
5663 scalar_dest = gimple_assign_lhs (stmt);
5664 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5665 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5666 {
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5669 "bit-precision shifts not supported.\n");
5670 return false;
5671 }
5672
5673 slp_tree slp_op0;
5674 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5675 0, &op0, &slp_op0, &dt[0], &vectype))
5676 {
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679 "use not simple.\n");
5680 return false;
5681 }
5682 /* If op0 is an external or constant def, infer the vector type
5683 from the scalar type. */
5684 if (!vectype)
5685 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5686 if (vec_stmt)
5687 gcc_assert (vectype);
5688 if (!vectype)
5689 {
5690 if (dump_enabled_p ())
5691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5692 "no vectype for scalar type\n");
5693 return false;
5694 }
5695
5696 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5697 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5698 if (maybe_ne (nunits_out, nunits_in))
5699 return false;
5700
5701 stmt_vec_info op1_def_stmt_info;
5702 slp_tree slp_op1;
5703 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5704 &dt[1], &op1_vectype, &op1_def_stmt_info))
5705 {
5706 if (dump_enabled_p ())
5707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5708 "use not simple.\n");
5709 return false;
5710 }
5711
5712 /* Multiple types in SLP are handled by creating the appropriate number of
5713 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5714 case of SLP. */
5715 if (slp_node)
5716 ncopies = 1;
5717 else
5718 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5719
5720 gcc_assert (ncopies >= 1);
5721
5722 /* Determine whether the shift amount is a vector, or scalar. If the
5723 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5724
5725 if ((dt[1] == vect_internal_def
5726 || dt[1] == vect_induction_def
5727 || dt[1] == vect_nested_cycle)
5728 && !slp_node)
5729 scalar_shift_arg = false;
5730 else if (dt[1] == vect_constant_def
5731 || dt[1] == vect_external_def
5732 || dt[1] == vect_internal_def)
5733 {
5734 /* In SLP, need to check whether the shift count is the same,
5735 in loops if it is a constant or invariant, it is always
5736 a scalar shift. */
5737 if (slp_node)
5738 {
5739 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5740 stmt_vec_info slpstmt_info;
5741
5742 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5743 {
5744 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5745 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5746 scalar_shift_arg = false;
5747 }
5748
5749 /* For internal SLP defs we have to make sure we see scalar stmts
5750 for all vector elements.
5751 ??? For different vectors we could resort to a different
5752 scalar shift operand but code-generation below simply always
5753 takes the first. */
5754 if (dt[1] == vect_internal_def
5755 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5756 stmts.length ()))
5757 scalar_shift_arg = false;
5758 }
5759
5760 /* If the shift amount is computed by a pattern stmt we cannot
5761 use the scalar amount directly thus give up and use a vector
5762 shift. */
5763 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5764 scalar_shift_arg = false;
5765 }
5766 else
5767 {
5768 if (dump_enabled_p ())
5769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5770 "operand mode requires invariant argument.\n");
5771 return false;
5772 }
5773
5774 /* Vector shifted by vector. */
5775 bool was_scalar_shift_arg = scalar_shift_arg;
5776 if (!scalar_shift_arg)
5777 {
5778 optab = optab_for_tree_code (code, vectype, optab_vector);
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_NOTE, vect_location,
5781 "vector/vector shift/rotate found.\n");
5782
5783 if (!op1_vectype)
5784 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5785 slp_op1);
5786 incompatible_op1_vectype_p
5787 = (op1_vectype == NULL_TREE
5788 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5789 TYPE_VECTOR_SUBPARTS (vectype))
5790 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5791 if (incompatible_op1_vectype_p
5792 && (!slp_node
5793 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5794 || slp_op1->refcnt != 1))
5795 {
5796 if (dump_enabled_p ())
5797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5798 "unusable type for last operand in"
5799 " vector/vector shift/rotate.\n");
5800 return false;
5801 }
5802 }
5803 /* See if the machine has a vector shifted by scalar insn and if not
5804 then see if it has a vector shifted by vector insn. */
5805 else
5806 {
5807 optab = optab_for_tree_code (code, vectype, optab_scalar);
5808 if (optab
5809 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5810 {
5811 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE, vect_location,
5813 "vector/scalar shift/rotate found.\n");
5814 }
5815 else
5816 {
5817 optab = optab_for_tree_code (code, vectype, optab_vector);
5818 if (optab
5819 && (optab_handler (optab, TYPE_MODE (vectype))
5820 != CODE_FOR_nothing))
5821 {
5822 scalar_shift_arg = false;
5823
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_NOTE, vect_location,
5826 "vector/vector shift/rotate found.\n");
5827
5828 if (!op1_vectype)
5829 op1_vectype = get_vectype_for_scalar_type (vinfo,
5830 TREE_TYPE (op1),
5831 slp_op1);
5832
5833 /* Unlike the other binary operators, shifts/rotates have
5834 the rhs being int, instead of the same type as the lhs,
5835 so make sure the scalar is the right type if we are
5836 dealing with vectors of long long/long/short/char. */
5837 incompatible_op1_vectype_p
5838 = (!op1_vectype
5839 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5840 TREE_TYPE (op1)));
5841 if (incompatible_op1_vectype_p
5842 && dt[1] == vect_internal_def)
5843 {
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "unusable type for last operand in"
5847 " vector/vector shift/rotate.\n");
5848 return false;
5849 }
5850 }
5851 }
5852 }
5853
5854 /* Supportable by target? */
5855 if (!optab)
5856 {
5857 if (dump_enabled_p ())
5858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5859 "no optab.\n");
5860 return false;
5861 }
5862 vec_mode = TYPE_MODE (vectype);
5863 icode = (int) optab_handler (optab, vec_mode);
5864 if (icode == CODE_FOR_nothing)
5865 {
5866 if (dump_enabled_p ())
5867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5868 "op not supported by target.\n");
5869 return false;
5870 }
5871 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5872 if (vect_emulated_vector_p (vectype))
5873 return false;
5874
5875 if (!vec_stmt) /* transformation not required. */
5876 {
5877 if (slp_node
5878 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5879 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5880 && (!incompatible_op1_vectype_p
5881 || dt[1] == vect_constant_def)
5882 && !vect_maybe_update_slp_op_vectype
5883 (slp_op1,
5884 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5885 {
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5888 "incompatible vector types for invariants\n");
5889 return false;
5890 }
5891 /* Now adjust the constant shift amount in place. */
5892 if (slp_node
5893 && incompatible_op1_vectype_p
5894 && dt[1] == vect_constant_def)
5895 {
5896 for (unsigned i = 0;
5897 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5898 {
5899 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5900 = fold_convert (TREE_TYPE (vectype),
5901 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5902 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5903 == INTEGER_CST));
5904 }
5905 }
5906 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5907 DUMP_VECT_SCOPE ("vectorizable_shift");
5908 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5909 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5910 return true;
5911 }
5912
5913 /* Transform. */
5914
5915 if (dump_enabled_p ())
5916 dump_printf_loc (MSG_NOTE, vect_location,
5917 "transform binary/unary operation.\n");
5918
5919 if (incompatible_op1_vectype_p && !slp_node)
5920 {
5921 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5922 op1 = fold_convert (TREE_TYPE (vectype), op1);
5923 if (dt[1] != vect_constant_def)
5924 op1 = vect_init_vector (vinfo, stmt_info, op1,
5925 TREE_TYPE (vectype), NULL);
5926 }
5927
5928 /* Handle def. */
5929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5930
5931 if (scalar_shift_arg && dt[1] != vect_internal_def)
5932 {
5933 /* Vector shl and shr insn patterns can be defined with scalar
5934 operand 2 (shift operand). In this case, use constant or loop
5935 invariant op1 directly, without extending it to vector mode
5936 first. */
5937 optab_op2_mode = insn_data[icode].operand[2].mode;
5938 if (!VECTOR_MODE_P (optab_op2_mode))
5939 {
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_NOTE, vect_location,
5942 "operand 1 using scalar mode.\n");
5943 vec_oprnd1 = op1;
5944 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5945 vec_oprnds1.quick_push (vec_oprnd1);
5946 /* Store vec_oprnd1 for every vector stmt to be created.
5947 We check during the analysis that all the shift arguments
5948 are the same.
5949 TODO: Allow different constants for different vector
5950 stmts generated for an SLP instance. */
5951 for (k = 0;
5952 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5953 vec_oprnds1.quick_push (vec_oprnd1);
5954 }
5955 }
5956 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5957 {
5958 if (was_scalar_shift_arg)
5959 {
5960 /* If the argument was the same in all lanes create
5961 the correctly typed vector shift amount directly. */
5962 op1 = fold_convert (TREE_TYPE (vectype), op1);
5963 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5964 !loop_vinfo ? gsi : NULL);
5965 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5966 !loop_vinfo ? gsi : NULL);
5967 vec_oprnds1.create (slp_node->vec_stmts_size);
5968 for (k = 0; k < slp_node->vec_stmts_size; k++)
5969 vec_oprnds1.quick_push (vec_oprnd1);
5970 }
5971 else if (dt[1] == vect_constant_def)
5972 /* The constant shift amount has been adjusted in place. */
5973 ;
5974 else
5975 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5976 }
5977
5978 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5979 (a special case for certain kind of vector shifts); otherwise,
5980 operand 1 should be of a vector type (the usual case). */
5981 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5982 op0, &vec_oprnds0,
5983 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5984
5985 /* Arguments are ready. Create the new vector stmt. */
5986 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5987 {
5988 /* For internal defs where we need to use a scalar shift arg
5989 extract the first lane. */
5990 if (scalar_shift_arg && dt[1] == vect_internal_def)
5991 {
5992 vop1 = vec_oprnds1[0];
5993 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5994 gassign *new_stmt
5995 = gimple_build_assign (new_temp,
5996 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5997 vop1,
5998 TYPE_SIZE (TREE_TYPE (new_temp)),
5999 bitsize_zero_node));
6000 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6001 vop1 = new_temp;
6002 }
6003 else
6004 vop1 = vec_oprnds1[i];
6005 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6006 new_temp = make_ssa_name (vec_dest, new_stmt);
6007 gimple_assign_set_lhs (new_stmt, new_temp);
6008 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6009 if (slp_node)
6010 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6011 else
6012 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6013 }
6014
6015 if (!slp_node)
6016 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6017
6018 vec_oprnds0.release ();
6019 vec_oprnds1.release ();
6020
6021 return true;
6022 }
6023
6024
6025 /* Function vectorizable_operation.
6026
6027 Check if STMT_INFO performs a binary, unary or ternary operation that can
6028 be vectorized.
6029 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6030 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6031 Return true if STMT_INFO is vectorizable in this way. */
6032
6033 static bool
6034 vectorizable_operation (vec_info *vinfo,
6035 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6036 gimple **vec_stmt, slp_tree slp_node,
6037 stmt_vector_for_cost *cost_vec)
6038 {
6039 tree vec_dest;
6040 tree scalar_dest;
6041 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6042 tree vectype;
6043 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6044 enum tree_code code, orig_code;
6045 machine_mode vec_mode;
6046 tree new_temp;
6047 int op_type;
6048 optab optab;
6049 bool target_support_p;
6050 enum vect_def_type dt[3]
6051 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6052 int ndts = 3;
6053 poly_uint64 nunits_in;
6054 poly_uint64 nunits_out;
6055 tree vectype_out;
6056 int ncopies, vec_num;
6057 int i;
6058 vec<tree> vec_oprnds0 = vNULL;
6059 vec<tree> vec_oprnds1 = vNULL;
6060 vec<tree> vec_oprnds2 = vNULL;
6061 tree vop0, vop1, vop2;
6062 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6063
6064 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6065 return false;
6066
6067 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6068 && ! vec_stmt)
6069 return false;
6070
6071 /* Is STMT a vectorizable binary/unary operation? */
6072 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6073 if (!stmt)
6074 return false;
6075
6076 /* Loads and stores are handled in vectorizable_{load,store}. */
6077 if (STMT_VINFO_DATA_REF (stmt_info))
6078 return false;
6079
6080 orig_code = code = gimple_assign_rhs_code (stmt);
6081
6082 /* Shifts are handled in vectorizable_shift. */
6083 if (code == LSHIFT_EXPR
6084 || code == RSHIFT_EXPR
6085 || code == LROTATE_EXPR
6086 || code == RROTATE_EXPR)
6087 return false;
6088
6089 /* Comparisons are handled in vectorizable_comparison. */
6090 if (TREE_CODE_CLASS (code) == tcc_comparison)
6091 return false;
6092
6093 /* Conditions are handled in vectorizable_condition. */
6094 if (code == COND_EXPR)
6095 return false;
6096
6097 /* For pointer addition and subtraction, we should use the normal
6098 plus and minus for the vector operation. */
6099 if (code == POINTER_PLUS_EXPR)
6100 code = PLUS_EXPR;
6101 if (code == POINTER_DIFF_EXPR)
6102 code = MINUS_EXPR;
6103
6104 /* Support only unary or binary operations. */
6105 op_type = TREE_CODE_LENGTH (code);
6106 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6107 {
6108 if (dump_enabled_p ())
6109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6110 "num. args = %d (not unary/binary/ternary op).\n",
6111 op_type);
6112 return false;
6113 }
6114
6115 scalar_dest = gimple_assign_lhs (stmt);
6116 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6117
6118 /* Most operations cannot handle bit-precision types without extra
6119 truncations. */
6120 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6121 if (!mask_op_p
6122 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6123 /* Exception are bitwise binary operations. */
6124 && code != BIT_IOR_EXPR
6125 && code != BIT_XOR_EXPR
6126 && code != BIT_AND_EXPR)
6127 {
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "bit-precision arithmetic not supported.\n");
6131 return false;
6132 }
6133
6134 slp_tree slp_op0;
6135 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6136 0, &op0, &slp_op0, &dt[0], &vectype))
6137 {
6138 if (dump_enabled_p ())
6139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6140 "use not simple.\n");
6141 return false;
6142 }
6143 /* If op0 is an external or constant def, infer the vector type
6144 from the scalar type. */
6145 if (!vectype)
6146 {
6147 /* For boolean type we cannot determine vectype by
6148 invariant value (don't know whether it is a vector
6149 of booleans or vector of integers). We use output
6150 vectype because operations on boolean don't change
6151 type. */
6152 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6153 {
6154 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6155 {
6156 if (dump_enabled_p ())
6157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6158 "not supported operation on bool value.\n");
6159 return false;
6160 }
6161 vectype = vectype_out;
6162 }
6163 else
6164 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6165 slp_node);
6166 }
6167 if (vec_stmt)
6168 gcc_assert (vectype);
6169 if (!vectype)
6170 {
6171 if (dump_enabled_p ())
6172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6173 "no vectype for scalar type %T\n",
6174 TREE_TYPE (op0));
6175
6176 return false;
6177 }
6178
6179 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6180 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6181 if (maybe_ne (nunits_out, nunits_in))
6182 return false;
6183
6184 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6185 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6186 if (op_type == binary_op || op_type == ternary_op)
6187 {
6188 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6189 1, &op1, &slp_op1, &dt[1], &vectype2))
6190 {
6191 if (dump_enabled_p ())
6192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6193 "use not simple.\n");
6194 return false;
6195 }
6196 if (vectype2
6197 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6198 return false;
6199 }
6200 if (op_type == ternary_op)
6201 {
6202 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6203 2, &op2, &slp_op2, &dt[2], &vectype3))
6204 {
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207 "use not simple.\n");
6208 return false;
6209 }
6210 if (vectype3
6211 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6212 return false;
6213 }
6214
6215 /* Multiple types in SLP are handled by creating the appropriate number of
6216 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6217 case of SLP. */
6218 if (slp_node)
6219 {
6220 ncopies = 1;
6221 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6222 }
6223 else
6224 {
6225 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6226 vec_num = 1;
6227 }
6228
6229 gcc_assert (ncopies >= 1);
6230
6231 /* Reject attempts to combine mask types with nonmask types, e.g. if
6232 we have an AND between a (nonmask) boolean loaded from memory and
6233 a (mask) boolean result of a comparison.
6234
6235 TODO: We could easily fix these cases up using pattern statements. */
6236 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6237 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6238 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6239 {
6240 if (dump_enabled_p ())
6241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6242 "mixed mask and nonmask vector types\n");
6243 return false;
6244 }
6245
6246 /* Supportable by target? */
6247
6248 vec_mode = TYPE_MODE (vectype);
6249 if (code == MULT_HIGHPART_EXPR)
6250 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6251 else
6252 {
6253 optab = optab_for_tree_code (code, vectype, optab_default);
6254 if (!optab)
6255 {
6256 if (dump_enabled_p ())
6257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6258 "no optab.\n");
6259 return false;
6260 }
6261 target_support_p = (optab_handler (optab, vec_mode)
6262 != CODE_FOR_nothing);
6263 }
6264
6265 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6266 if (!target_support_p)
6267 {
6268 if (dump_enabled_p ())
6269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6270 "op not supported by target.\n");
6271 /* Check only during analysis. */
6272 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6273 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6274 return false;
6275 if (dump_enabled_p ())
6276 dump_printf_loc (MSG_NOTE, vect_location,
6277 "proceeding using word mode.\n");
6278 using_emulated_vectors_p = true;
6279 }
6280
6281 if (using_emulated_vectors_p
6282 && !vect_can_vectorize_without_simd_p (code))
6283 {
6284 if (dump_enabled_p ())
6285 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6286 return false;
6287 }
6288
6289 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6290 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6291 internal_fn cond_fn = get_conditional_internal_fn (code);
6292
6293 if (!vec_stmt) /* transformation not required. */
6294 {
6295 /* If this operation is part of a reduction, a fully-masked loop
6296 should only change the active lanes of the reduction chain,
6297 keeping the inactive lanes as-is. */
6298 if (loop_vinfo
6299 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6300 && reduc_idx >= 0)
6301 {
6302 if (cond_fn == IFN_LAST
6303 || !direct_internal_fn_supported_p (cond_fn, vectype,
6304 OPTIMIZE_FOR_SPEED))
6305 {
6306 if (dump_enabled_p ())
6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6308 "can't use a fully-masked loop because no"
6309 " conditional operation is available.\n");
6310 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6311 }
6312 else
6313 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6314 vectype, NULL);
6315 }
6316
6317 /* Put types on constant and invariant SLP children. */
6318 if (slp_node
6319 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6320 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6321 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6322 {
6323 if (dump_enabled_p ())
6324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6325 "incompatible vector types for invariants\n");
6326 return false;
6327 }
6328
6329 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6330 DUMP_VECT_SCOPE ("vectorizable_operation");
6331 vect_model_simple_cost (vinfo, stmt_info,
6332 ncopies, dt, ndts, slp_node, cost_vec);
6333 if (using_emulated_vectors_p)
6334 {
6335 /* The above vect_model_simple_cost call handles constants
6336 in the prologue and (mis-)costs one of the stmts as
6337 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6338 for the actual lowering that will be applied. */
6339 unsigned n
6340 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6341 switch (code)
6342 {
6343 case PLUS_EXPR:
6344 n *= 5;
6345 break;
6346 case MINUS_EXPR:
6347 n *= 6;
6348 break;
6349 case NEGATE_EXPR:
6350 n *= 4;
6351 break;
6352 default:;
6353 }
6354 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6355 }
6356 return true;
6357 }
6358
6359 /* Transform. */
6360
6361 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_NOTE, vect_location,
6363 "transform binary/unary operation.\n");
6364
6365 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6366
6367 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6368 vectors with unsigned elements, but the result is signed. So, we
6369 need to compute the MINUS_EXPR into vectype temporary and
6370 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6371 tree vec_cvt_dest = NULL_TREE;
6372 if (orig_code == POINTER_DIFF_EXPR)
6373 {
6374 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6375 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6376 }
6377 /* Handle def. */
6378 else
6379 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6380
6381 /* In case the vectorization factor (VF) is bigger than the number
6382 of elements that we can fit in a vectype (nunits), we have to generate
6383 more than one vector stmt - i.e - we need to "unroll" the
6384 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6385 from one copy of the vector stmt to the next, in the field
6386 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6387 stages to find the correct vector defs to be used when vectorizing
6388 stmts that use the defs of the current stmt. The example below
6389 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6390 we need to create 4 vectorized stmts):
6391
6392 before vectorization:
6393 RELATED_STMT VEC_STMT
6394 S1: x = memref - -
6395 S2: z = x + 1 - -
6396
6397 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6398 there):
6399 RELATED_STMT VEC_STMT
6400 VS1_0: vx0 = memref0 VS1_1 -
6401 VS1_1: vx1 = memref1 VS1_2 -
6402 VS1_2: vx2 = memref2 VS1_3 -
6403 VS1_3: vx3 = memref3 - -
6404 S1: x = load - VS1_0
6405 S2: z = x + 1 - -
6406
6407 step2: vectorize stmt S2 (done here):
6408 To vectorize stmt S2 we first need to find the relevant vector
6409 def for the first operand 'x'. This is, as usual, obtained from
6410 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6411 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6412 relevant vector def 'vx0'. Having found 'vx0' we can generate
6413 the vector stmt VS2_0, and as usual, record it in the
6414 STMT_VINFO_VEC_STMT of stmt S2.
6415 When creating the second copy (VS2_1), we obtain the relevant vector
6416 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6417 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6418 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6419 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6420 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6421 chain of stmts and pointers:
6422 RELATED_STMT VEC_STMT
6423 VS1_0: vx0 = memref0 VS1_1 -
6424 VS1_1: vx1 = memref1 VS1_2 -
6425 VS1_2: vx2 = memref2 VS1_3 -
6426 VS1_3: vx3 = memref3 - -
6427 S1: x = load - VS1_0
6428 VS2_0: vz0 = vx0 + v1 VS2_1 -
6429 VS2_1: vz1 = vx1 + v1 VS2_2 -
6430 VS2_2: vz2 = vx2 + v1 VS2_3 -
6431 VS2_3: vz3 = vx3 + v1 - -
6432 S2: z = x + 1 - VS2_0 */
6433
6434 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6435 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6436 /* Arguments are ready. Create the new vector stmt. */
6437 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6438 {
6439 gimple *new_stmt = NULL;
6440 vop1 = ((op_type == binary_op || op_type == ternary_op)
6441 ? vec_oprnds1[i] : NULL_TREE);
6442 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6443 if (masked_loop_p && reduc_idx >= 0)
6444 {
6445 /* Perform the operation on active elements only and take
6446 inactive elements from the reduction chain input. */
6447 gcc_assert (!vop2);
6448 vop2 = reduc_idx == 1 ? vop1 : vop0;
6449 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6450 vectype, i);
6451 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6452 vop0, vop1, vop2);
6453 new_temp = make_ssa_name (vec_dest, call);
6454 gimple_call_set_lhs (call, new_temp);
6455 gimple_call_set_nothrow (call, true);
6456 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6457 new_stmt = call;
6458 }
6459 else
6460 {
6461 tree mask = NULL_TREE;
6462 /* When combining two masks check if either of them is elsewhere
6463 combined with a loop mask, if that's the case we can mark that the
6464 new combined mask doesn't need to be combined with a loop mask. */
6465 if (masked_loop_p
6466 && code == BIT_AND_EXPR
6467 && VECTOR_BOOLEAN_TYPE_P (vectype))
6468 {
6469 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6470 ncopies}))
6471 {
6472 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6473 vectype, i);
6474
6475 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6476 vop0, gsi);
6477 }
6478
6479 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6480 ncopies }))
6481 {
6482 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6483 vectype, i);
6484
6485 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6486 vop1, gsi);
6487 }
6488 }
6489
6490 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6491 new_temp = make_ssa_name (vec_dest, new_stmt);
6492 gimple_assign_set_lhs (new_stmt, new_temp);
6493 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6494 if (using_emulated_vectors_p)
6495 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6496
6497 /* Enter the combined value into the vector cond hash so we don't
6498 AND it with a loop mask again. */
6499 if (mask)
6500 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6501
6502 if (vec_cvt_dest)
6503 {
6504 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6505 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6506 new_temp);
6507 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6508 gimple_assign_set_lhs (new_stmt, new_temp);
6509 vect_finish_stmt_generation (vinfo, stmt_info,
6510 new_stmt, gsi);
6511 }
6512 }
6513 if (slp_node)
6514 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6515 else
6516 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6517 }
6518
6519 if (!slp_node)
6520 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6521
6522 vec_oprnds0.release ();
6523 vec_oprnds1.release ();
6524 vec_oprnds2.release ();
6525
6526 return true;
6527 }
6528
6529 /* A helper function to ensure data reference DR_INFO's base alignment. */
6530
6531 static void
6532 ensure_base_align (dr_vec_info *dr_info)
6533 {
6534 /* Alignment is only analyzed for the first element of a DR group,
6535 use that to look at base alignment we need to enforce. */
6536 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6537 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6538
6539 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6540
6541 if (dr_info->base_misaligned)
6542 {
6543 tree base_decl = dr_info->base_decl;
6544
6545 // We should only be able to increase the alignment of a base object if
6546 // we know what its new alignment should be at compile time.
6547 unsigned HOST_WIDE_INT align_base_to =
6548 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6549
6550 if (decl_in_symtab_p (base_decl))
6551 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6552 else if (DECL_ALIGN (base_decl) < align_base_to)
6553 {
6554 SET_DECL_ALIGN (base_decl, align_base_to);
6555 DECL_USER_ALIGN (base_decl) = 1;
6556 }
6557 dr_info->base_misaligned = false;
6558 }
6559 }
6560
6561
6562 /* Function get_group_alias_ptr_type.
6563
6564 Return the alias type for the group starting at FIRST_STMT_INFO. */
6565
6566 static tree
6567 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6568 {
6569 struct data_reference *first_dr, *next_dr;
6570
6571 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6572 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6573 while (next_stmt_info)
6574 {
6575 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6576 if (get_alias_set (DR_REF (first_dr))
6577 != get_alias_set (DR_REF (next_dr)))
6578 {
6579 if (dump_enabled_p ())
6580 dump_printf_loc (MSG_NOTE, vect_location,
6581 "conflicting alias set types.\n");
6582 return ptr_type_node;
6583 }
6584 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6585 }
6586 return reference_alias_ptr_type (DR_REF (first_dr));
6587 }
6588
6589
6590 /* Function scan_operand_equal_p.
6591
6592 Helper function for check_scan_store. Compare two references
6593 with .GOMP_SIMD_LANE bases. */
6594
6595 static bool
6596 scan_operand_equal_p (tree ref1, tree ref2)
6597 {
6598 tree ref[2] = { ref1, ref2 };
6599 poly_int64 bitsize[2], bitpos[2];
6600 tree offset[2], base[2];
6601 for (int i = 0; i < 2; ++i)
6602 {
6603 machine_mode mode;
6604 int unsignedp, reversep, volatilep = 0;
6605 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6606 &offset[i], &mode, &unsignedp,
6607 &reversep, &volatilep);
6608 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6609 return false;
6610 if (TREE_CODE (base[i]) == MEM_REF
6611 && offset[i] == NULL_TREE
6612 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6613 {
6614 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6615 if (is_gimple_assign (def_stmt)
6616 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6617 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6618 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6619 {
6620 if (maybe_ne (mem_ref_offset (base[i]), 0))
6621 return false;
6622 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6623 offset[i] = gimple_assign_rhs2 (def_stmt);
6624 }
6625 }
6626 }
6627
6628 if (!operand_equal_p (base[0], base[1], 0))
6629 return false;
6630 if (maybe_ne (bitsize[0], bitsize[1]))
6631 return false;
6632 if (offset[0] != offset[1])
6633 {
6634 if (!offset[0] || !offset[1])
6635 return false;
6636 if (!operand_equal_p (offset[0], offset[1], 0))
6637 {
6638 tree step[2];
6639 for (int i = 0; i < 2; ++i)
6640 {
6641 step[i] = integer_one_node;
6642 if (TREE_CODE (offset[i]) == SSA_NAME)
6643 {
6644 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6645 if (is_gimple_assign (def_stmt)
6646 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6647 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6648 == INTEGER_CST))
6649 {
6650 step[i] = gimple_assign_rhs2 (def_stmt);
6651 offset[i] = gimple_assign_rhs1 (def_stmt);
6652 }
6653 }
6654 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6655 {
6656 step[i] = TREE_OPERAND (offset[i], 1);
6657 offset[i] = TREE_OPERAND (offset[i], 0);
6658 }
6659 tree rhs1 = NULL_TREE;
6660 if (TREE_CODE (offset[i]) == SSA_NAME)
6661 {
6662 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6663 if (gimple_assign_cast_p (def_stmt))
6664 rhs1 = gimple_assign_rhs1 (def_stmt);
6665 }
6666 else if (CONVERT_EXPR_P (offset[i]))
6667 rhs1 = TREE_OPERAND (offset[i], 0);
6668 if (rhs1
6669 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6670 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6671 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6672 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6673 offset[i] = rhs1;
6674 }
6675 if (!operand_equal_p (offset[0], offset[1], 0)
6676 || !operand_equal_p (step[0], step[1], 0))
6677 return false;
6678 }
6679 }
6680 return true;
6681 }
6682
6683
6684 enum scan_store_kind {
6685 /* Normal permutation. */
6686 scan_store_kind_perm,
6687
6688 /* Whole vector left shift permutation with zero init. */
6689 scan_store_kind_lshift_zero,
6690
6691 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6692 scan_store_kind_lshift_cond
6693 };
6694
6695 /* Function check_scan_store.
6696
6697 Verify if we can perform the needed permutations or whole vector shifts.
6698 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6699 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6700 to do at each step. */
6701
6702 static int
6703 scan_store_can_perm_p (tree vectype, tree init,
6704 vec<enum scan_store_kind> *use_whole_vector = NULL)
6705 {
6706 enum machine_mode vec_mode = TYPE_MODE (vectype);
6707 unsigned HOST_WIDE_INT nunits;
6708 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6709 return -1;
6710 int units_log2 = exact_log2 (nunits);
6711 if (units_log2 <= 0)
6712 return -1;
6713
6714 int i;
6715 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6716 for (i = 0; i <= units_log2; ++i)
6717 {
6718 unsigned HOST_WIDE_INT j, k;
6719 enum scan_store_kind kind = scan_store_kind_perm;
6720 vec_perm_builder sel (nunits, nunits, 1);
6721 sel.quick_grow (nunits);
6722 if (i == units_log2)
6723 {
6724 for (j = 0; j < nunits; ++j)
6725 sel[j] = nunits - 1;
6726 }
6727 else
6728 {
6729 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6730 sel[j] = j;
6731 for (k = 0; j < nunits; ++j, ++k)
6732 sel[j] = nunits + k;
6733 }
6734 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6735 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
6736 {
6737 if (i == units_log2)
6738 return -1;
6739
6740 if (whole_vector_shift_kind == scan_store_kind_perm)
6741 {
6742 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6743 return -1;
6744 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6745 /* Whole vector shifts shift in zeros, so if init is all zero
6746 constant, there is no need to do anything further. */
6747 if ((TREE_CODE (init) != INTEGER_CST
6748 && TREE_CODE (init) != REAL_CST)
6749 || !initializer_zerop (init))
6750 {
6751 tree masktype = truth_type_for (vectype);
6752 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6753 return -1;
6754 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6755 }
6756 }
6757 kind = whole_vector_shift_kind;
6758 }
6759 if (use_whole_vector)
6760 {
6761 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6762 use_whole_vector->safe_grow_cleared (i, true);
6763 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6764 use_whole_vector->safe_push (kind);
6765 }
6766 }
6767
6768 return units_log2;
6769 }
6770
6771
6772 /* Function check_scan_store.
6773
6774 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6775
6776 static bool
6777 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6778 enum vect_def_type rhs_dt, bool slp, tree mask,
6779 vect_memory_access_type memory_access_type)
6780 {
6781 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6782 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6783 tree ref_type;
6784
6785 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6786 if (slp
6787 || mask
6788 || memory_access_type != VMAT_CONTIGUOUS
6789 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6790 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6791 || loop_vinfo == NULL
6792 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6793 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6794 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6795 || !integer_zerop (DR_INIT (dr_info->dr))
6796 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6797 || !alias_sets_conflict_p (get_alias_set (vectype),
6798 get_alias_set (TREE_TYPE (ref_type))))
6799 {
6800 if (dump_enabled_p ())
6801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6802 "unsupported OpenMP scan store.\n");
6803 return false;
6804 }
6805
6806 /* We need to pattern match code built by OpenMP lowering and simplified
6807 by following optimizations into something we can handle.
6808 #pragma omp simd reduction(inscan,+:r)
6809 for (...)
6810 {
6811 r += something ();
6812 #pragma omp scan inclusive (r)
6813 use (r);
6814 }
6815 shall have body with:
6816 // Initialization for input phase, store the reduction initializer:
6817 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6818 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6819 D.2042[_21] = 0;
6820 // Actual input phase:
6821 ...
6822 r.0_5 = D.2042[_20];
6823 _6 = _4 + r.0_5;
6824 D.2042[_20] = _6;
6825 // Initialization for scan phase:
6826 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6827 _26 = D.2043[_25];
6828 _27 = D.2042[_25];
6829 _28 = _26 + _27;
6830 D.2043[_25] = _28;
6831 D.2042[_25] = _28;
6832 // Actual scan phase:
6833 ...
6834 r.1_8 = D.2042[_20];
6835 ...
6836 The "omp simd array" variable D.2042 holds the privatized copy used
6837 inside of the loop and D.2043 is another one that holds copies of
6838 the current original list item. The separate GOMP_SIMD_LANE ifn
6839 kinds are there in order to allow optimizing the initializer store
6840 and combiner sequence, e.g. if it is originally some C++ish user
6841 defined reduction, but allow the vectorizer to pattern recognize it
6842 and turn into the appropriate vectorized scan.
6843
6844 For exclusive scan, this is slightly different:
6845 #pragma omp simd reduction(inscan,+:r)
6846 for (...)
6847 {
6848 use (r);
6849 #pragma omp scan exclusive (r)
6850 r += something ();
6851 }
6852 shall have body with:
6853 // Initialization for input phase, store the reduction initializer:
6854 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6855 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6856 D.2042[_21] = 0;
6857 // Actual input phase:
6858 ...
6859 r.0_5 = D.2042[_20];
6860 _6 = _4 + r.0_5;
6861 D.2042[_20] = _6;
6862 // Initialization for scan phase:
6863 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6864 _26 = D.2043[_25];
6865 D.2044[_25] = _26;
6866 _27 = D.2042[_25];
6867 _28 = _26 + _27;
6868 D.2043[_25] = _28;
6869 // Actual scan phase:
6870 ...
6871 r.1_8 = D.2044[_20];
6872 ... */
6873
6874 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6875 {
6876 /* Match the D.2042[_21] = 0; store above. Just require that
6877 it is a constant or external definition store. */
6878 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6879 {
6880 fail_init:
6881 if (dump_enabled_p ())
6882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6883 "unsupported OpenMP scan initializer store.\n");
6884 return false;
6885 }
6886
6887 if (! loop_vinfo->scan_map)
6888 loop_vinfo->scan_map = new hash_map<tree, tree>;
6889 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6890 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6891 if (cached)
6892 goto fail_init;
6893 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6894
6895 /* These stores can be vectorized normally. */
6896 return true;
6897 }
6898
6899 if (rhs_dt != vect_internal_def)
6900 {
6901 fail:
6902 if (dump_enabled_p ())
6903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6904 "unsupported OpenMP scan combiner pattern.\n");
6905 return false;
6906 }
6907
6908 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6909 tree rhs = gimple_assign_rhs1 (stmt);
6910 if (TREE_CODE (rhs) != SSA_NAME)
6911 goto fail;
6912
6913 gimple *other_store_stmt = NULL;
6914 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6915 bool inscan_var_store
6916 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6917
6918 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6919 {
6920 if (!inscan_var_store)
6921 {
6922 use_operand_p use_p;
6923 imm_use_iterator iter;
6924 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6925 {
6926 gimple *use_stmt = USE_STMT (use_p);
6927 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6928 continue;
6929 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6930 || !is_gimple_assign (use_stmt)
6931 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6932 || other_store_stmt
6933 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6934 goto fail;
6935 other_store_stmt = use_stmt;
6936 }
6937 if (other_store_stmt == NULL)
6938 goto fail;
6939 rhs = gimple_assign_lhs (other_store_stmt);
6940 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6941 goto fail;
6942 }
6943 }
6944 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6945 {
6946 use_operand_p use_p;
6947 imm_use_iterator iter;
6948 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6949 {
6950 gimple *use_stmt = USE_STMT (use_p);
6951 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6952 continue;
6953 if (other_store_stmt)
6954 goto fail;
6955 other_store_stmt = use_stmt;
6956 }
6957 }
6958 else
6959 goto fail;
6960
6961 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6962 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6963 || !is_gimple_assign (def_stmt)
6964 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6965 goto fail;
6966
6967 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6968 /* For pointer addition, we should use the normal plus for the vector
6969 operation. */
6970 switch (code)
6971 {
6972 case POINTER_PLUS_EXPR:
6973 code = PLUS_EXPR;
6974 break;
6975 case MULT_HIGHPART_EXPR:
6976 goto fail;
6977 default:
6978 break;
6979 }
6980 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6981 goto fail;
6982
6983 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6984 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6985 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6986 goto fail;
6987
6988 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6989 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6990 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6991 || !gimple_assign_load_p (load1_stmt)
6992 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6993 || !gimple_assign_load_p (load2_stmt))
6994 goto fail;
6995
6996 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6997 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6998 if (load1_stmt_info == NULL
6999 || load2_stmt_info == NULL
7000 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7001 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7002 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7003 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7004 goto fail;
7005
7006 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7007 {
7008 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7009 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7010 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7011 goto fail;
7012 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7013 tree lrhs;
7014 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7015 lrhs = rhs1;
7016 else
7017 lrhs = rhs2;
7018 use_operand_p use_p;
7019 imm_use_iterator iter;
7020 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7021 {
7022 gimple *use_stmt = USE_STMT (use_p);
7023 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7024 continue;
7025 if (other_store_stmt)
7026 goto fail;
7027 other_store_stmt = use_stmt;
7028 }
7029 }
7030
7031 if (other_store_stmt == NULL)
7032 goto fail;
7033 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7034 || !gimple_store_p (other_store_stmt))
7035 goto fail;
7036
7037 stmt_vec_info other_store_stmt_info
7038 = loop_vinfo->lookup_stmt (other_store_stmt);
7039 if (other_store_stmt_info == NULL
7040 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7041 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7042 goto fail;
7043
7044 gimple *stmt1 = stmt;
7045 gimple *stmt2 = other_store_stmt;
7046 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7047 std::swap (stmt1, stmt2);
7048 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7049 gimple_assign_rhs1 (load2_stmt)))
7050 {
7051 std::swap (rhs1, rhs2);
7052 std::swap (load1_stmt, load2_stmt);
7053 std::swap (load1_stmt_info, load2_stmt_info);
7054 }
7055 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7056 gimple_assign_rhs1 (load1_stmt)))
7057 goto fail;
7058
7059 tree var3 = NULL_TREE;
7060 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7061 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7062 gimple_assign_rhs1 (load2_stmt)))
7063 goto fail;
7064 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7065 {
7066 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7067 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7068 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7069 goto fail;
7070 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7071 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7072 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7073 || lookup_attribute ("omp simd inscan exclusive",
7074 DECL_ATTRIBUTES (var3)))
7075 goto fail;
7076 }
7077
7078 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7079 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7080 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7081 goto fail;
7082
7083 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7084 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7085 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7086 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7087 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7088 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7089 goto fail;
7090
7091 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7092 std::swap (var1, var2);
7093
7094 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7095 {
7096 if (!lookup_attribute ("omp simd inscan exclusive",
7097 DECL_ATTRIBUTES (var1)))
7098 goto fail;
7099 var1 = var3;
7100 }
7101
7102 if (loop_vinfo->scan_map == NULL)
7103 goto fail;
7104 tree *init = loop_vinfo->scan_map->get (var1);
7105 if (init == NULL)
7106 goto fail;
7107
7108 /* The IL is as expected, now check if we can actually vectorize it.
7109 Inclusive scan:
7110 _26 = D.2043[_25];
7111 _27 = D.2042[_25];
7112 _28 = _26 + _27;
7113 D.2043[_25] = _28;
7114 D.2042[_25] = _28;
7115 should be vectorized as (where _40 is the vectorized rhs
7116 from the D.2042[_21] = 0; store):
7117 _30 = MEM <vector(8) int> [(int *)&D.2043];
7118 _31 = MEM <vector(8) int> [(int *)&D.2042];
7119 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7120 _33 = _31 + _32;
7121 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7122 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7123 _35 = _33 + _34;
7124 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7125 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7126 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7127 _37 = _35 + _36;
7128 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7129 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7130 _38 = _30 + _37;
7131 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7132 MEM <vector(8) int> [(int *)&D.2043] = _39;
7133 MEM <vector(8) int> [(int *)&D.2042] = _38;
7134 Exclusive scan:
7135 _26 = D.2043[_25];
7136 D.2044[_25] = _26;
7137 _27 = D.2042[_25];
7138 _28 = _26 + _27;
7139 D.2043[_25] = _28;
7140 should be vectorized as (where _40 is the vectorized rhs
7141 from the D.2042[_21] = 0; store):
7142 _30 = MEM <vector(8) int> [(int *)&D.2043];
7143 _31 = MEM <vector(8) int> [(int *)&D.2042];
7144 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7145 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7146 _34 = _32 + _33;
7147 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7148 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7149 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7150 _36 = _34 + _35;
7151 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7152 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7153 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7154 _38 = _36 + _37;
7155 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7156 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7157 _39 = _30 + _38;
7158 _50 = _31 + _39;
7159 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7160 MEM <vector(8) int> [(int *)&D.2044] = _39;
7161 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7162 enum machine_mode vec_mode = TYPE_MODE (vectype);
7163 optab optab = optab_for_tree_code (code, vectype, optab_default);
7164 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7165 goto fail;
7166
7167 int units_log2 = scan_store_can_perm_p (vectype, *init);
7168 if (units_log2 == -1)
7169 goto fail;
7170
7171 return true;
7172 }
7173
7174
7175 /* Function vectorizable_scan_store.
7176
7177 Helper of vectorizable_score, arguments like on vectorizable_store.
7178 Handle only the transformation, checking is done in check_scan_store. */
7179
7180 static bool
7181 vectorizable_scan_store (vec_info *vinfo,
7182 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7183 gimple **vec_stmt, int ncopies)
7184 {
7185 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7186 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7187 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7188 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7189
7190 if (dump_enabled_p ())
7191 dump_printf_loc (MSG_NOTE, vect_location,
7192 "transform scan store. ncopies = %d\n", ncopies);
7193
7194 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7195 tree rhs = gimple_assign_rhs1 (stmt);
7196 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7197
7198 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7199 bool inscan_var_store
7200 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7201
7202 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7203 {
7204 use_operand_p use_p;
7205 imm_use_iterator iter;
7206 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7207 {
7208 gimple *use_stmt = USE_STMT (use_p);
7209 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7210 continue;
7211 rhs = gimple_assign_lhs (use_stmt);
7212 break;
7213 }
7214 }
7215
7216 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7217 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7218 if (code == POINTER_PLUS_EXPR)
7219 code = PLUS_EXPR;
7220 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7221 && commutative_tree_code (code));
7222 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7223 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7224 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7225 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7226 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7227 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7228 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7229 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7230 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7231 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7232 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7233
7234 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7235 {
7236 std::swap (rhs1, rhs2);
7237 std::swap (var1, var2);
7238 std::swap (load1_dr_info, load2_dr_info);
7239 }
7240
7241 tree *init = loop_vinfo->scan_map->get (var1);
7242 gcc_assert (init);
7243
7244 unsigned HOST_WIDE_INT nunits;
7245 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7246 gcc_unreachable ();
7247 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7248 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7249 gcc_assert (units_log2 > 0);
7250 auto_vec<tree, 16> perms;
7251 perms.quick_grow (units_log2 + 1);
7252 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7253 for (int i = 0; i <= units_log2; ++i)
7254 {
7255 unsigned HOST_WIDE_INT j, k;
7256 vec_perm_builder sel (nunits, nunits, 1);
7257 sel.quick_grow (nunits);
7258 if (i == units_log2)
7259 for (j = 0; j < nunits; ++j)
7260 sel[j] = nunits - 1;
7261 else
7262 {
7263 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7264 sel[j] = j;
7265 for (k = 0; j < nunits; ++j, ++k)
7266 sel[j] = nunits + k;
7267 }
7268 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7269 if (!use_whole_vector.is_empty ()
7270 && use_whole_vector[i] != scan_store_kind_perm)
7271 {
7272 if (zero_vec == NULL_TREE)
7273 zero_vec = build_zero_cst (vectype);
7274 if (masktype == NULL_TREE
7275 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7276 masktype = truth_type_for (vectype);
7277 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7278 }
7279 else
7280 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7281 }
7282
7283 tree vec_oprnd1 = NULL_TREE;
7284 tree vec_oprnd2 = NULL_TREE;
7285 tree vec_oprnd3 = NULL_TREE;
7286 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7287 tree dataref_offset = build_int_cst (ref_type, 0);
7288 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7289 vectype, VMAT_CONTIGUOUS);
7290 tree ldataref_ptr = NULL_TREE;
7291 tree orig = NULL_TREE;
7292 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7293 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7294 auto_vec<tree> vec_oprnds1;
7295 auto_vec<tree> vec_oprnds2;
7296 auto_vec<tree> vec_oprnds3;
7297 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7298 *init, &vec_oprnds1,
7299 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7300 rhs2, &vec_oprnds3);
7301 for (int j = 0; j < ncopies; j++)
7302 {
7303 vec_oprnd1 = vec_oprnds1[j];
7304 if (ldataref_ptr == NULL)
7305 vec_oprnd2 = vec_oprnds2[j];
7306 vec_oprnd3 = vec_oprnds3[j];
7307 if (j == 0)
7308 orig = vec_oprnd3;
7309 else if (!inscan_var_store)
7310 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7311
7312 if (ldataref_ptr)
7313 {
7314 vec_oprnd2 = make_ssa_name (vectype);
7315 tree data_ref = fold_build2 (MEM_REF, vectype,
7316 unshare_expr (ldataref_ptr),
7317 dataref_offset);
7318 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7319 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7320 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7321 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7322 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7323 }
7324
7325 tree v = vec_oprnd2;
7326 for (int i = 0; i < units_log2; ++i)
7327 {
7328 tree new_temp = make_ssa_name (vectype);
7329 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7330 (zero_vec
7331 && (use_whole_vector[i]
7332 != scan_store_kind_perm))
7333 ? zero_vec : vec_oprnd1, v,
7334 perms[i]);
7335 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7336 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7337 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7338
7339 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7340 {
7341 /* Whole vector shift shifted in zero bits, but if *init
7342 is not initializer_zerop, we need to replace those elements
7343 with elements from vec_oprnd1. */
7344 tree_vector_builder vb (masktype, nunits, 1);
7345 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7346 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7347 ? boolean_false_node : boolean_true_node);
7348
7349 tree new_temp2 = make_ssa_name (vectype);
7350 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7351 new_temp, vec_oprnd1);
7352 vect_finish_stmt_generation (vinfo, stmt_info,
7353 g, gsi);
7354 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7355 new_temp = new_temp2;
7356 }
7357
7358 /* For exclusive scan, perform the perms[i] permutation once
7359 more. */
7360 if (i == 0
7361 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7362 && v == vec_oprnd2)
7363 {
7364 v = new_temp;
7365 --i;
7366 continue;
7367 }
7368
7369 tree new_temp2 = make_ssa_name (vectype);
7370 g = gimple_build_assign (new_temp2, code, v, new_temp);
7371 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7372 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7373
7374 v = new_temp2;
7375 }
7376
7377 tree new_temp = make_ssa_name (vectype);
7378 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7379 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7380 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7381
7382 tree last_perm_arg = new_temp;
7383 /* For exclusive scan, new_temp computed above is the exclusive scan
7384 prefix sum. Turn it into inclusive prefix sum for the broadcast
7385 of the last element into orig. */
7386 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7387 {
7388 last_perm_arg = make_ssa_name (vectype);
7389 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7390 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7391 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7392 }
7393
7394 orig = make_ssa_name (vectype);
7395 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7396 last_perm_arg, perms[units_log2]);
7397 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7398 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7399
7400 if (!inscan_var_store)
7401 {
7402 tree data_ref = fold_build2 (MEM_REF, vectype,
7403 unshare_expr (dataref_ptr),
7404 dataref_offset);
7405 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7406 g = gimple_build_assign (data_ref, new_temp);
7407 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7408 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7409 }
7410 }
7411
7412 if (inscan_var_store)
7413 for (int j = 0; j < ncopies; j++)
7414 {
7415 if (j != 0)
7416 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7417
7418 tree data_ref = fold_build2 (MEM_REF, vectype,
7419 unshare_expr (dataref_ptr),
7420 dataref_offset);
7421 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7422 gimple *g = gimple_build_assign (data_ref, orig);
7423 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7424 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7425 }
7426 return true;
7427 }
7428
7429
7430 /* Function vectorizable_store.
7431
7432 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7433 that can be vectorized.
7434 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7435 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7436 Return true if STMT_INFO is vectorizable in this way. */
7437
7438 static bool
7439 vectorizable_store (vec_info *vinfo,
7440 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7441 gimple **vec_stmt, slp_tree slp_node,
7442 stmt_vector_for_cost *cost_vec)
7443 {
7444 tree data_ref;
7445 tree op;
7446 tree vec_oprnd = NULL_TREE;
7447 tree elem_type;
7448 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7449 class loop *loop = NULL;
7450 machine_mode vec_mode;
7451 tree dummy;
7452 enum vect_def_type rhs_dt = vect_unknown_def_type;
7453 enum vect_def_type mask_dt = vect_unknown_def_type;
7454 tree dataref_ptr = NULL_TREE;
7455 tree dataref_offset = NULL_TREE;
7456 gimple *ptr_incr = NULL;
7457 int ncopies;
7458 int j;
7459 stmt_vec_info first_stmt_info;
7460 bool grouped_store;
7461 unsigned int group_size, i;
7462 vec<tree> oprnds = vNULL;
7463 vec<tree> result_chain = vNULL;
7464 vec<tree> vec_oprnds = vNULL;
7465 bool slp = (slp_node != NULL);
7466 unsigned int vec_num;
7467 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7468 tree aggr_type;
7469 gather_scatter_info gs_info;
7470 poly_uint64 vf;
7471 vec_load_store_type vls_type;
7472 tree ref_type;
7473
7474 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7475 return false;
7476
7477 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7478 && ! vec_stmt)
7479 return false;
7480
7481 /* Is vectorizable store? */
7482
7483 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7484 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7485 {
7486 tree scalar_dest = gimple_assign_lhs (assign);
7487 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7488 && is_pattern_stmt_p (stmt_info))
7489 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7490 if (TREE_CODE (scalar_dest) != ARRAY_REF
7491 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7492 && TREE_CODE (scalar_dest) != INDIRECT_REF
7493 && TREE_CODE (scalar_dest) != COMPONENT_REF
7494 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7495 && TREE_CODE (scalar_dest) != REALPART_EXPR
7496 && TREE_CODE (scalar_dest) != MEM_REF)
7497 return false;
7498 }
7499 else
7500 {
7501 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7502 if (!call || !gimple_call_internal_p (call))
7503 return false;
7504
7505 internal_fn ifn = gimple_call_internal_fn (call);
7506 if (!internal_store_fn_p (ifn))
7507 return false;
7508
7509 if (slp_node != NULL)
7510 {
7511 if (dump_enabled_p ())
7512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7513 "SLP of masked stores not supported.\n");
7514 return false;
7515 }
7516
7517 int mask_index = internal_fn_mask_index (ifn);
7518 if (mask_index >= 0
7519 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7520 &mask, NULL, &mask_dt, &mask_vectype))
7521 return false;
7522 }
7523
7524 op = vect_get_store_rhs (stmt_info);
7525
7526 /* Cannot have hybrid store SLP -- that would mean storing to the
7527 same location twice. */
7528 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7529
7530 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7531 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7532
7533 if (loop_vinfo)
7534 {
7535 loop = LOOP_VINFO_LOOP (loop_vinfo);
7536 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7537 }
7538 else
7539 vf = 1;
7540
7541 /* Multiple types in SLP are handled by creating the appropriate number of
7542 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7543 case of SLP. */
7544 if (slp)
7545 ncopies = 1;
7546 else
7547 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7548
7549 gcc_assert (ncopies >= 1);
7550
7551 /* FORNOW. This restriction should be relaxed. */
7552 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7553 {
7554 if (dump_enabled_p ())
7555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7556 "multiple types in nested loop.\n");
7557 return false;
7558 }
7559
7560 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7561 op, &rhs_dt, &rhs_vectype, &vls_type))
7562 return false;
7563
7564 elem_type = TREE_TYPE (vectype);
7565 vec_mode = TYPE_MODE (vectype);
7566
7567 if (!STMT_VINFO_DATA_REF (stmt_info))
7568 return false;
7569
7570 vect_memory_access_type memory_access_type;
7571 enum dr_alignment_support alignment_support_scheme;
7572 int misalignment;
7573 poly_int64 poffset;
7574 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7575 ncopies, &memory_access_type, &poffset,
7576 &alignment_support_scheme, &misalignment, &gs_info))
7577 return false;
7578
7579 if (mask)
7580 {
7581 if (memory_access_type == VMAT_CONTIGUOUS)
7582 {
7583 if (!VECTOR_MODE_P (vec_mode)
7584 || !can_vec_mask_load_store_p (vec_mode,
7585 TYPE_MODE (mask_vectype), false))
7586 return false;
7587 }
7588 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7589 && (memory_access_type != VMAT_GATHER_SCATTER
7590 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7591 {
7592 if (dump_enabled_p ())
7593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7594 "unsupported access type for masked store.\n");
7595 return false;
7596 }
7597 }
7598 else
7599 {
7600 /* FORNOW. In some cases can vectorize even if data-type not supported
7601 (e.g. - array initialization with 0). */
7602 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7603 return false;
7604 }
7605
7606 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7607 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7608 && memory_access_type != VMAT_GATHER_SCATTER
7609 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7610 if (grouped_store)
7611 {
7612 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7613 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7614 group_size = DR_GROUP_SIZE (first_stmt_info);
7615 }
7616 else
7617 {
7618 first_stmt_info = stmt_info;
7619 first_dr_info = dr_info;
7620 group_size = vec_num = 1;
7621 }
7622
7623 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7624 {
7625 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7626 memory_access_type))
7627 return false;
7628 }
7629
7630 if (!vec_stmt) /* transformation not required. */
7631 {
7632 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7633
7634 if (loop_vinfo
7635 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7636 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7637 vls_type, group_size,
7638 memory_access_type, &gs_info,
7639 mask);
7640
7641 if (slp_node
7642 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7643 vectype))
7644 {
7645 if (dump_enabled_p ())
7646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7647 "incompatible vector types for invariants\n");
7648 return false;
7649 }
7650
7651 if (dump_enabled_p ()
7652 && memory_access_type != VMAT_ELEMENTWISE
7653 && memory_access_type != VMAT_GATHER_SCATTER
7654 && alignment_support_scheme != dr_aligned)
7655 dump_printf_loc (MSG_NOTE, vect_location,
7656 "Vectorizing an unaligned access.\n");
7657
7658 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7659 vect_model_store_cost (vinfo, stmt_info, ncopies,
7660 memory_access_type, alignment_support_scheme,
7661 misalignment, vls_type, slp_node, cost_vec);
7662 return true;
7663 }
7664 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7665
7666 /* Transform. */
7667
7668 ensure_base_align (dr_info);
7669
7670 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7671 {
7672 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7673 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7674 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7675 tree ptr, var, scale, vec_mask;
7676 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7677 tree mask_halfvectype = mask_vectype;
7678 edge pe = loop_preheader_edge (loop);
7679 gimple_seq seq;
7680 basic_block new_bb;
7681 enum { NARROW, NONE, WIDEN } modifier;
7682 poly_uint64 scatter_off_nunits
7683 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7684
7685 if (known_eq (nunits, scatter_off_nunits))
7686 modifier = NONE;
7687 else if (known_eq (nunits * 2, scatter_off_nunits))
7688 {
7689 modifier = WIDEN;
7690
7691 /* Currently gathers and scatters are only supported for
7692 fixed-length vectors. */
7693 unsigned int count = scatter_off_nunits.to_constant ();
7694 vec_perm_builder sel (count, count, 1);
7695 for (i = 0; i < (unsigned int) count; ++i)
7696 sel.quick_push (i | (count / 2));
7697
7698 vec_perm_indices indices (sel, 1, count);
7699 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7700 indices);
7701 gcc_assert (perm_mask != NULL_TREE);
7702 }
7703 else if (known_eq (nunits, scatter_off_nunits * 2))
7704 {
7705 modifier = NARROW;
7706
7707 /* Currently gathers and scatters are only supported for
7708 fixed-length vectors. */
7709 unsigned int count = nunits.to_constant ();
7710 vec_perm_builder sel (count, count, 1);
7711 for (i = 0; i < (unsigned int) count; ++i)
7712 sel.quick_push (i | (count / 2));
7713
7714 vec_perm_indices indices (sel, 2, count);
7715 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7716 gcc_assert (perm_mask != NULL_TREE);
7717 ncopies *= 2;
7718
7719 if (mask)
7720 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7721 }
7722 else
7723 gcc_unreachable ();
7724
7725 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7726 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7727 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7728 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7729 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7730 scaletype = TREE_VALUE (arglist);
7731
7732 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7733 && TREE_CODE (rettype) == VOID_TYPE);
7734
7735 ptr = fold_convert (ptrtype, gs_info.base);
7736 if (!is_gimple_min_invariant (ptr))
7737 {
7738 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7739 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7740 gcc_assert (!new_bb);
7741 }
7742
7743 if (mask == NULL_TREE)
7744 {
7745 mask_arg = build_int_cst (masktype, -1);
7746 mask_arg = vect_init_vector (vinfo, stmt_info,
7747 mask_arg, masktype, NULL);
7748 }
7749
7750 scale = build_int_cst (scaletype, gs_info.scale);
7751
7752 auto_vec<tree> vec_oprnds0;
7753 auto_vec<tree> vec_oprnds1;
7754 auto_vec<tree> vec_masks;
7755 if (mask)
7756 {
7757 tree mask_vectype = truth_type_for (vectype);
7758 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7759 modifier == NARROW
7760 ? ncopies / 2 : ncopies,
7761 mask, &vec_masks, mask_vectype);
7762 }
7763 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7764 modifier == WIDEN
7765 ? ncopies / 2 : ncopies,
7766 gs_info.offset, &vec_oprnds0);
7767 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7768 modifier == NARROW
7769 ? ncopies / 2 : ncopies,
7770 op, &vec_oprnds1);
7771 for (j = 0; j < ncopies; ++j)
7772 {
7773 if (modifier == WIDEN)
7774 {
7775 if (j & 1)
7776 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7777 perm_mask, stmt_info, gsi);
7778 else
7779 op = vec_oprnd0 = vec_oprnds0[j / 2];
7780 src = vec_oprnd1 = vec_oprnds1[j];
7781 if (mask)
7782 mask_op = vec_mask = vec_masks[j];
7783 }
7784 else if (modifier == NARROW)
7785 {
7786 if (j & 1)
7787 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7788 perm_mask, stmt_info, gsi);
7789 else
7790 src = vec_oprnd1 = vec_oprnds1[j / 2];
7791 op = vec_oprnd0 = vec_oprnds0[j];
7792 if (mask)
7793 mask_op = vec_mask = vec_masks[j / 2];
7794 }
7795 else
7796 {
7797 op = vec_oprnd0 = vec_oprnds0[j];
7798 src = vec_oprnd1 = vec_oprnds1[j];
7799 if (mask)
7800 mask_op = vec_mask = vec_masks[j];
7801 }
7802
7803 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7804 {
7805 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7806 TYPE_VECTOR_SUBPARTS (srctype)));
7807 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7808 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7809 gassign *new_stmt
7810 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7811 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7812 src = var;
7813 }
7814
7815 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7816 {
7817 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7818 TYPE_VECTOR_SUBPARTS (idxtype)));
7819 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7820 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7821 gassign *new_stmt
7822 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7824 op = var;
7825 }
7826
7827 if (mask)
7828 {
7829 tree utype;
7830 mask_arg = mask_op;
7831 if (modifier == NARROW)
7832 {
7833 var = vect_get_new_ssa_name (mask_halfvectype,
7834 vect_simple_var);
7835 gassign *new_stmt
7836 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7837 : VEC_UNPACK_LO_EXPR,
7838 mask_op);
7839 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7840 mask_arg = var;
7841 }
7842 tree optype = TREE_TYPE (mask_arg);
7843 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7844 utype = masktype;
7845 else
7846 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7847 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7848 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7849 gassign *new_stmt
7850 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7851 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7852 mask_arg = var;
7853 if (!useless_type_conversion_p (masktype, utype))
7854 {
7855 gcc_assert (TYPE_PRECISION (utype)
7856 <= TYPE_PRECISION (masktype));
7857 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7858 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7859 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7860 mask_arg = var;
7861 }
7862 }
7863
7864 gcall *new_stmt
7865 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7866 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7867
7868 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7869 }
7870 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7871 return true;
7872 }
7873 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7874 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7875
7876 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7877 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7878
7879 if (grouped_store)
7880 {
7881 /* FORNOW */
7882 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7883
7884 /* We vectorize all the stmts of the interleaving group when we
7885 reach the last stmt in the group. */
7886 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7887 < DR_GROUP_SIZE (first_stmt_info)
7888 && !slp)
7889 {
7890 *vec_stmt = NULL;
7891 return true;
7892 }
7893
7894 if (slp)
7895 {
7896 grouped_store = false;
7897 /* VEC_NUM is the number of vect stmts to be created for this
7898 group. */
7899 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7900 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7901 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7902 == first_stmt_info);
7903 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7904 op = vect_get_store_rhs (first_stmt_info);
7905 }
7906 else
7907 /* VEC_NUM is the number of vect stmts to be created for this
7908 group. */
7909 vec_num = group_size;
7910
7911 ref_type = get_group_alias_ptr_type (first_stmt_info);
7912 }
7913 else
7914 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7915
7916 if (dump_enabled_p ())
7917 dump_printf_loc (MSG_NOTE, vect_location,
7918 "transform store. ncopies = %d\n", ncopies);
7919
7920 if (memory_access_type == VMAT_ELEMENTWISE
7921 || memory_access_type == VMAT_STRIDED_SLP)
7922 {
7923 gimple_stmt_iterator incr_gsi;
7924 bool insert_after;
7925 gimple *incr;
7926 tree offvar;
7927 tree ivstep;
7928 tree running_off;
7929 tree stride_base, stride_step, alias_off;
7930 tree vec_oprnd;
7931 tree dr_offset;
7932 unsigned int g;
7933 /* Checked by get_load_store_type. */
7934 unsigned int const_nunits = nunits.to_constant ();
7935
7936 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7937 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7938
7939 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7940 stride_base
7941 = fold_build_pointer_plus
7942 (DR_BASE_ADDRESS (first_dr_info->dr),
7943 size_binop (PLUS_EXPR,
7944 convert_to_ptrofftype (dr_offset),
7945 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7946 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7947
7948 /* For a store with loop-invariant (but other than power-of-2)
7949 stride (i.e. not a grouped access) like so:
7950
7951 for (i = 0; i < n; i += stride)
7952 array[i] = ...;
7953
7954 we generate a new induction variable and new stores from
7955 the components of the (vectorized) rhs:
7956
7957 for (j = 0; ; j += VF*stride)
7958 vectemp = ...;
7959 tmp1 = vectemp[0];
7960 array[j] = tmp1;
7961 tmp2 = vectemp[1];
7962 array[j + stride] = tmp2;
7963 ...
7964 */
7965
7966 unsigned nstores = const_nunits;
7967 unsigned lnel = 1;
7968 tree ltype = elem_type;
7969 tree lvectype = vectype;
7970 if (slp)
7971 {
7972 if (group_size < const_nunits
7973 && const_nunits % group_size == 0)
7974 {
7975 nstores = const_nunits / group_size;
7976 lnel = group_size;
7977 ltype = build_vector_type (elem_type, group_size);
7978 lvectype = vectype;
7979
7980 /* First check if vec_extract optab doesn't support extraction
7981 of vector elts directly. */
7982 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7983 machine_mode vmode;
7984 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7985 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7986 group_size).exists (&vmode)
7987 || (convert_optab_handler (vec_extract_optab,
7988 TYPE_MODE (vectype), vmode)
7989 == CODE_FOR_nothing))
7990 {
7991 /* Try to avoid emitting an extract of vector elements
7992 by performing the extracts using an integer type of the
7993 same size, extracting from a vector of those and then
7994 re-interpreting it as the original vector type if
7995 supported. */
7996 unsigned lsize
7997 = group_size * GET_MODE_BITSIZE (elmode);
7998 unsigned int lnunits = const_nunits / group_size;
7999 /* If we can't construct such a vector fall back to
8000 element extracts from the original vector type and
8001 element size stores. */
8002 if (int_mode_for_size (lsize, 0).exists (&elmode)
8003 && VECTOR_MODE_P (TYPE_MODE (vectype))
8004 && related_vector_mode (TYPE_MODE (vectype), elmode,
8005 lnunits).exists (&vmode)
8006 && (convert_optab_handler (vec_extract_optab,
8007 vmode, elmode)
8008 != CODE_FOR_nothing))
8009 {
8010 nstores = lnunits;
8011 lnel = group_size;
8012 ltype = build_nonstandard_integer_type (lsize, 1);
8013 lvectype = build_vector_type (ltype, nstores);
8014 }
8015 /* Else fall back to vector extraction anyway.
8016 Fewer stores are more important than avoiding spilling
8017 of the vector we extract from. Compared to the
8018 construction case in vectorizable_load no store-forwarding
8019 issue exists here for reasonable archs. */
8020 }
8021 }
8022 else if (group_size >= const_nunits
8023 && group_size % const_nunits == 0)
8024 {
8025 nstores = 1;
8026 lnel = const_nunits;
8027 ltype = vectype;
8028 lvectype = vectype;
8029 }
8030 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8031 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8032 }
8033
8034 ivstep = stride_step;
8035 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8036 build_int_cst (TREE_TYPE (ivstep), vf));
8037
8038 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8039
8040 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8041 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8042 create_iv (stride_base, ivstep, NULL,
8043 loop, &incr_gsi, insert_after,
8044 &offvar, NULL);
8045 incr = gsi_stmt (incr_gsi);
8046
8047 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8048
8049 alias_off = build_int_cst (ref_type, 0);
8050 stmt_vec_info next_stmt_info = first_stmt_info;
8051 for (g = 0; g < group_size; g++)
8052 {
8053 running_off = offvar;
8054 if (g)
8055 {
8056 tree size = TYPE_SIZE_UNIT (ltype);
8057 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8058 size);
8059 tree newoff = copy_ssa_name (running_off, NULL);
8060 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8061 running_off, pos);
8062 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8063 running_off = newoff;
8064 }
8065 if (!slp)
8066 op = vect_get_store_rhs (next_stmt_info);
8067 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8068 op, &vec_oprnds);
8069 unsigned int group_el = 0;
8070 unsigned HOST_WIDE_INT
8071 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8072 for (j = 0; j < ncopies; j++)
8073 {
8074 vec_oprnd = vec_oprnds[j];
8075 /* Pun the vector to extract from if necessary. */
8076 if (lvectype != vectype)
8077 {
8078 tree tem = make_ssa_name (lvectype);
8079 gimple *pun
8080 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8081 lvectype, vec_oprnd));
8082 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8083 vec_oprnd = tem;
8084 }
8085 for (i = 0; i < nstores; i++)
8086 {
8087 tree newref, newoff;
8088 gimple *incr, *assign;
8089 tree size = TYPE_SIZE (ltype);
8090 /* Extract the i'th component. */
8091 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8092 bitsize_int (i), size);
8093 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8094 size, pos);
8095
8096 elem = force_gimple_operand_gsi (gsi, elem, true,
8097 NULL_TREE, true,
8098 GSI_SAME_STMT);
8099
8100 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8101 group_el * elsz);
8102 newref = build2 (MEM_REF, ltype,
8103 running_off, this_off);
8104 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8105
8106 /* And store it to *running_off. */
8107 assign = gimple_build_assign (newref, elem);
8108 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8109
8110 group_el += lnel;
8111 if (! slp
8112 || group_el == group_size)
8113 {
8114 newoff = copy_ssa_name (running_off, NULL);
8115 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8116 running_off, stride_step);
8117 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8118
8119 running_off = newoff;
8120 group_el = 0;
8121 }
8122 if (g == group_size - 1
8123 && !slp)
8124 {
8125 if (j == 0 && i == 0)
8126 *vec_stmt = assign;
8127 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8128 }
8129 }
8130 }
8131 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8132 vec_oprnds.release ();
8133 if (slp)
8134 break;
8135 }
8136
8137 return true;
8138 }
8139
8140 auto_vec<tree> dr_chain (group_size);
8141 oprnds.create (group_size);
8142
8143 gcc_assert (alignment_support_scheme);
8144 vec_loop_masks *loop_masks
8145 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8146 ? &LOOP_VINFO_MASKS (loop_vinfo)
8147 : NULL);
8148 vec_loop_lens *loop_lens
8149 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8150 ? &LOOP_VINFO_LENS (loop_vinfo)
8151 : NULL);
8152
8153 /* Shouldn't go with length-based approach if fully masked. */
8154 gcc_assert (!loop_lens || !loop_masks);
8155
8156 /* Targets with store-lane instructions must not require explicit
8157 realignment. vect_supportable_dr_alignment always returns either
8158 dr_aligned or dr_unaligned_supported for masked operations. */
8159 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8160 && !mask
8161 && !loop_masks)
8162 || alignment_support_scheme == dr_aligned
8163 || alignment_support_scheme == dr_unaligned_supported);
8164
8165 tree offset = NULL_TREE;
8166 if (!known_eq (poffset, 0))
8167 offset = size_int (poffset);
8168
8169 tree bump;
8170 tree vec_offset = NULL_TREE;
8171 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8172 {
8173 aggr_type = NULL_TREE;
8174 bump = NULL_TREE;
8175 }
8176 else if (memory_access_type == VMAT_GATHER_SCATTER)
8177 {
8178 aggr_type = elem_type;
8179 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8180 &bump, &vec_offset);
8181 }
8182 else
8183 {
8184 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8185 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8186 else
8187 aggr_type = vectype;
8188 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8189 memory_access_type);
8190 }
8191
8192 if (mask)
8193 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8194
8195 /* In case the vectorization factor (VF) is bigger than the number
8196 of elements that we can fit in a vectype (nunits), we have to generate
8197 more than one vector stmt - i.e - we need to "unroll" the
8198 vector stmt by a factor VF/nunits. */
8199
8200 /* In case of interleaving (non-unit grouped access):
8201
8202 S1: &base + 2 = x2
8203 S2: &base = x0
8204 S3: &base + 1 = x1
8205 S4: &base + 3 = x3
8206
8207 We create vectorized stores starting from base address (the access of the
8208 first stmt in the chain (S2 in the above example), when the last store stmt
8209 of the chain (S4) is reached:
8210
8211 VS1: &base = vx2
8212 VS2: &base + vec_size*1 = vx0
8213 VS3: &base + vec_size*2 = vx1
8214 VS4: &base + vec_size*3 = vx3
8215
8216 Then permutation statements are generated:
8217
8218 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8219 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8220 ...
8221
8222 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8223 (the order of the data-refs in the output of vect_permute_store_chain
8224 corresponds to the order of scalar stmts in the interleaving chain - see
8225 the documentation of vect_permute_store_chain()).
8226
8227 In case of both multiple types and interleaving, above vector stores and
8228 permutation stmts are created for every copy. The result vector stmts are
8229 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8230 STMT_VINFO_RELATED_STMT for the next copies.
8231 */
8232
8233 auto_vec<tree> vec_masks;
8234 tree vec_mask = NULL;
8235 auto_vec<tree> vec_offsets;
8236 auto_vec<vec<tree> > gvec_oprnds;
8237 gvec_oprnds.safe_grow_cleared (group_size, true);
8238 for (j = 0; j < ncopies; j++)
8239 {
8240 gimple *new_stmt;
8241 if (j == 0)
8242 {
8243 if (slp)
8244 {
8245 /* Get vectorized arguments for SLP_NODE. */
8246 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8247 op, &vec_oprnds);
8248 vec_oprnd = vec_oprnds[0];
8249 }
8250 else
8251 {
8252 /* For interleaved stores we collect vectorized defs for all the
8253 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8254 used as an input to vect_permute_store_chain().
8255
8256 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8257 and OPRNDS are of size 1. */
8258 stmt_vec_info next_stmt_info = first_stmt_info;
8259 for (i = 0; i < group_size; i++)
8260 {
8261 /* Since gaps are not supported for interleaved stores,
8262 DR_GROUP_SIZE is the exact number of stmts in the chain.
8263 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8264 that there is no interleaving, DR_GROUP_SIZE is 1,
8265 and only one iteration of the loop will be executed. */
8266 op = vect_get_store_rhs (next_stmt_info);
8267 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8268 ncopies, op, &gvec_oprnds[i]);
8269 vec_oprnd = gvec_oprnds[i][0];
8270 dr_chain.quick_push (gvec_oprnds[i][0]);
8271 oprnds.quick_push (gvec_oprnds[i][0]);
8272 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8273 }
8274 if (mask)
8275 {
8276 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8277 mask, &vec_masks, mask_vectype);
8278 vec_mask = vec_masks[0];
8279 }
8280 }
8281
8282 /* We should have catched mismatched types earlier. */
8283 gcc_assert (useless_type_conversion_p (vectype,
8284 TREE_TYPE (vec_oprnd)));
8285 bool simd_lane_access_p
8286 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8287 if (simd_lane_access_p
8288 && !loop_masks
8289 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8290 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8291 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8292 && integer_zerop (DR_INIT (first_dr_info->dr))
8293 && alias_sets_conflict_p (get_alias_set (aggr_type),
8294 get_alias_set (TREE_TYPE (ref_type))))
8295 {
8296 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8297 dataref_offset = build_int_cst (ref_type, 0);
8298 }
8299 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8300 {
8301 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8302 slp_node, &gs_info, &dataref_ptr,
8303 &vec_offsets);
8304 vec_offset = vec_offsets[0];
8305 }
8306 else
8307 dataref_ptr
8308 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8309 simd_lane_access_p ? loop : NULL,
8310 offset, &dummy, gsi, &ptr_incr,
8311 simd_lane_access_p, bump);
8312 }
8313 else
8314 {
8315 /* For interleaved stores we created vectorized defs for all the
8316 defs stored in OPRNDS in the previous iteration (previous copy).
8317 DR_CHAIN is then used as an input to vect_permute_store_chain().
8318 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8319 OPRNDS are of size 1. */
8320 for (i = 0; i < group_size; i++)
8321 {
8322 vec_oprnd = gvec_oprnds[i][j];
8323 dr_chain[i] = gvec_oprnds[i][j];
8324 oprnds[i] = gvec_oprnds[i][j];
8325 }
8326 if (mask)
8327 vec_mask = vec_masks[j];
8328 if (dataref_offset)
8329 dataref_offset
8330 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8331 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8332 vec_offset = vec_offsets[j];
8333 else
8334 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8335 stmt_info, bump);
8336 }
8337
8338 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8339 {
8340 tree vec_array;
8341
8342 /* Get an array into which we can store the individual vectors. */
8343 vec_array = create_vector_array (vectype, vec_num);
8344
8345 /* Invalidate the current contents of VEC_ARRAY. This should
8346 become an RTL clobber too, which prevents the vector registers
8347 from being upward-exposed. */
8348 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8349
8350 /* Store the individual vectors into the array. */
8351 for (i = 0; i < vec_num; i++)
8352 {
8353 vec_oprnd = dr_chain[i];
8354 write_vector_array (vinfo, stmt_info,
8355 gsi, vec_oprnd, vec_array, i);
8356 }
8357
8358 tree final_mask = NULL;
8359 if (loop_masks)
8360 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8361 vectype, j);
8362 if (vec_mask)
8363 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8364 final_mask, vec_mask, gsi);
8365
8366 gcall *call;
8367 if (final_mask)
8368 {
8369 /* Emit:
8370 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8371 VEC_ARRAY). */
8372 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8373 tree alias_ptr = build_int_cst (ref_type, align);
8374 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8375 dataref_ptr, alias_ptr,
8376 final_mask, vec_array);
8377 }
8378 else
8379 {
8380 /* Emit:
8381 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8382 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8383 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8384 vec_array);
8385 gimple_call_set_lhs (call, data_ref);
8386 }
8387 gimple_call_set_nothrow (call, true);
8388 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8389 new_stmt = call;
8390
8391 /* Record that VEC_ARRAY is now dead. */
8392 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8393 }
8394 else
8395 {
8396 new_stmt = NULL;
8397 if (grouped_store)
8398 {
8399 if (j == 0)
8400 result_chain.create (group_size);
8401 /* Permute. */
8402 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8403 gsi, &result_chain);
8404 }
8405
8406 stmt_vec_info next_stmt_info = first_stmt_info;
8407 for (i = 0; i < vec_num; i++)
8408 {
8409 unsigned misalign;
8410 unsigned HOST_WIDE_INT align;
8411
8412 tree final_mask = NULL_TREE;
8413 if (loop_masks)
8414 final_mask = vect_get_loop_mask (gsi, loop_masks,
8415 vec_num * ncopies,
8416 vectype, vec_num * j + i);
8417 if (vec_mask)
8418 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8419 final_mask, vec_mask, gsi);
8420
8421 if (memory_access_type == VMAT_GATHER_SCATTER)
8422 {
8423 tree scale = size_int (gs_info.scale);
8424 gcall *call;
8425 if (final_mask)
8426 call = gimple_build_call_internal
8427 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8428 scale, vec_oprnd, final_mask);
8429 else
8430 call = gimple_build_call_internal
8431 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8432 scale, vec_oprnd);
8433 gimple_call_set_nothrow (call, true);
8434 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8435 new_stmt = call;
8436 break;
8437 }
8438
8439 if (i > 0)
8440 /* Bump the vector pointer. */
8441 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8442 gsi, stmt_info, bump);
8443
8444 if (slp)
8445 vec_oprnd = vec_oprnds[i];
8446 else if (grouped_store)
8447 /* For grouped stores vectorized defs are interleaved in
8448 vect_permute_store_chain(). */
8449 vec_oprnd = result_chain[i];
8450
8451 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8452 if (alignment_support_scheme == dr_aligned)
8453 misalign = 0;
8454 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8455 {
8456 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8457 misalign = 0;
8458 }
8459 else
8460 misalign = misalignment;
8461 if (dataref_offset == NULL_TREE
8462 && TREE_CODE (dataref_ptr) == SSA_NAME)
8463 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8464 misalign);
8465 align = least_bit_hwi (misalign | align);
8466
8467 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8468 {
8469 tree perm_mask = perm_mask_for_reverse (vectype);
8470 tree perm_dest = vect_create_destination_var
8471 (vect_get_store_rhs (stmt_info), vectype);
8472 tree new_temp = make_ssa_name (perm_dest);
8473
8474 /* Generate the permute statement. */
8475 gimple *perm_stmt
8476 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8477 vec_oprnd, perm_mask);
8478 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8479
8480 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8481 vec_oprnd = new_temp;
8482 }
8483
8484 /* Arguments are ready. Create the new vector stmt. */
8485 if (final_mask)
8486 {
8487 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8488 gcall *call
8489 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8490 dataref_ptr, ptr,
8491 final_mask, vec_oprnd);
8492 gimple_call_set_nothrow (call, true);
8493 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8494 new_stmt = call;
8495 }
8496 else if (loop_lens)
8497 {
8498 tree final_len
8499 = vect_get_loop_len (loop_vinfo, loop_lens,
8500 vec_num * ncopies, vec_num * j + i);
8501 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8502 machine_mode vmode = TYPE_MODE (vectype);
8503 opt_machine_mode new_ovmode
8504 = get_len_load_store_mode (vmode, false);
8505 machine_mode new_vmode = new_ovmode.require ();
8506 /* Need conversion if it's wrapped with VnQI. */
8507 if (vmode != new_vmode)
8508 {
8509 tree new_vtype
8510 = build_vector_type_for_mode (unsigned_intQI_type_node,
8511 new_vmode);
8512 tree var
8513 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8514 vec_oprnd
8515 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8516 gassign *new_stmt
8517 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8518 vec_oprnd);
8519 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8520 gsi);
8521 vec_oprnd = var;
8522 }
8523
8524 signed char biasval =
8525 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8526
8527 tree bias = build_int_cst (intQI_type_node, biasval);
8528 gcall *call
8529 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8530 ptr, final_len, vec_oprnd,
8531 bias);
8532 gimple_call_set_nothrow (call, true);
8533 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8534 new_stmt = call;
8535 }
8536 else
8537 {
8538 data_ref = fold_build2 (MEM_REF, vectype,
8539 dataref_ptr,
8540 dataref_offset
8541 ? dataref_offset
8542 : build_int_cst (ref_type, 0));
8543 if (alignment_support_scheme == dr_aligned)
8544 ;
8545 else
8546 TREE_TYPE (data_ref)
8547 = build_aligned_type (TREE_TYPE (data_ref),
8548 align * BITS_PER_UNIT);
8549 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8550 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8551 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8552 }
8553
8554 if (slp)
8555 continue;
8556
8557 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8558 if (!next_stmt_info)
8559 break;
8560 }
8561 }
8562 if (!slp)
8563 {
8564 if (j == 0)
8565 *vec_stmt = new_stmt;
8566 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8567 }
8568 }
8569
8570 for (i = 0; i < group_size; ++i)
8571 {
8572 vec<tree> oprndsi = gvec_oprnds[i];
8573 oprndsi.release ();
8574 }
8575 oprnds.release ();
8576 result_chain.release ();
8577 vec_oprnds.release ();
8578
8579 return true;
8580 }
8581
8582 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8583 VECTOR_CST mask. No checks are made that the target platform supports the
8584 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8585 vect_gen_perm_mask_checked. */
8586
8587 tree
8588 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8589 {
8590 tree mask_type;
8591
8592 poly_uint64 nunits = sel.length ();
8593 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8594
8595 mask_type = build_vector_type (ssizetype, nunits);
8596 return vec_perm_indices_to_tree (mask_type, sel);
8597 }
8598
8599 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8600 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8601
8602 tree
8603 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8604 {
8605 machine_mode vmode = TYPE_MODE (vectype);
8606 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
8607 return vect_gen_perm_mask_any (vectype, sel);
8608 }
8609
8610 /* Given a vector variable X and Y, that was generated for the scalar
8611 STMT_INFO, generate instructions to permute the vector elements of X and Y
8612 using permutation mask MASK_VEC, insert them at *GSI and return the
8613 permuted vector variable. */
8614
8615 static tree
8616 permute_vec_elements (vec_info *vinfo,
8617 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8618 gimple_stmt_iterator *gsi)
8619 {
8620 tree vectype = TREE_TYPE (x);
8621 tree perm_dest, data_ref;
8622 gimple *perm_stmt;
8623
8624 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8625 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8626 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8627 else
8628 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8629 data_ref = make_ssa_name (perm_dest);
8630
8631 /* Generate the permute statement. */
8632 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8633 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8634
8635 return data_ref;
8636 }
8637
8638 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8639 inserting them on the loops preheader edge. Returns true if we
8640 were successful in doing so (and thus STMT_INFO can be moved then),
8641 otherwise returns false. */
8642
8643 static bool
8644 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8645 {
8646 ssa_op_iter i;
8647 tree op;
8648 bool any = false;
8649
8650 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8651 {
8652 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8653 if (!gimple_nop_p (def_stmt)
8654 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8655 {
8656 /* Make sure we don't need to recurse. While we could do
8657 so in simple cases when there are more complex use webs
8658 we don't have an easy way to preserve stmt order to fulfil
8659 dependencies within them. */
8660 tree op2;
8661 ssa_op_iter i2;
8662 if (gimple_code (def_stmt) == GIMPLE_PHI)
8663 return false;
8664 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8665 {
8666 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8667 if (!gimple_nop_p (def_stmt2)
8668 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8669 return false;
8670 }
8671 any = true;
8672 }
8673 }
8674
8675 if (!any)
8676 return true;
8677
8678 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8679 {
8680 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8681 if (!gimple_nop_p (def_stmt)
8682 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8683 {
8684 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8685 gsi_remove (&gsi, false);
8686 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8687 }
8688 }
8689
8690 return true;
8691 }
8692
8693 /* vectorizable_load.
8694
8695 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8696 that can be vectorized.
8697 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8698 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8699 Return true if STMT_INFO is vectorizable in this way. */
8700
8701 static bool
8702 vectorizable_load (vec_info *vinfo,
8703 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8704 gimple **vec_stmt, slp_tree slp_node,
8705 stmt_vector_for_cost *cost_vec)
8706 {
8707 tree scalar_dest;
8708 tree vec_dest = NULL;
8709 tree data_ref = NULL;
8710 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8711 class loop *loop = NULL;
8712 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8713 bool nested_in_vect_loop = false;
8714 tree elem_type;
8715 tree new_temp;
8716 machine_mode mode;
8717 tree dummy;
8718 tree dataref_ptr = NULL_TREE;
8719 tree dataref_offset = NULL_TREE;
8720 gimple *ptr_incr = NULL;
8721 int ncopies;
8722 int i, j;
8723 unsigned int group_size;
8724 poly_uint64 group_gap_adj;
8725 tree msq = NULL_TREE, lsq;
8726 tree realignment_token = NULL_TREE;
8727 gphi *phi = NULL;
8728 vec<tree> dr_chain = vNULL;
8729 bool grouped_load = false;
8730 stmt_vec_info first_stmt_info;
8731 stmt_vec_info first_stmt_info_for_drptr = NULL;
8732 bool compute_in_loop = false;
8733 class loop *at_loop;
8734 int vec_num;
8735 bool slp = (slp_node != NULL);
8736 bool slp_perm = false;
8737 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8738 poly_uint64 vf;
8739 tree aggr_type;
8740 gather_scatter_info gs_info;
8741 tree ref_type;
8742 enum vect_def_type mask_dt = vect_unknown_def_type;
8743
8744 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8745 return false;
8746
8747 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8748 && ! vec_stmt)
8749 return false;
8750
8751 if (!STMT_VINFO_DATA_REF (stmt_info))
8752 return false;
8753
8754 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8755 int mask_index = -1;
8756 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8757 {
8758 scalar_dest = gimple_assign_lhs (assign);
8759 if (TREE_CODE (scalar_dest) != SSA_NAME)
8760 return false;
8761
8762 tree_code code = gimple_assign_rhs_code (assign);
8763 if (code != ARRAY_REF
8764 && code != BIT_FIELD_REF
8765 && code != INDIRECT_REF
8766 && code != COMPONENT_REF
8767 && code != IMAGPART_EXPR
8768 && code != REALPART_EXPR
8769 && code != MEM_REF
8770 && TREE_CODE_CLASS (code) != tcc_declaration)
8771 return false;
8772 }
8773 else
8774 {
8775 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8776 if (!call || !gimple_call_internal_p (call))
8777 return false;
8778
8779 internal_fn ifn = gimple_call_internal_fn (call);
8780 if (!internal_load_fn_p (ifn))
8781 return false;
8782
8783 scalar_dest = gimple_call_lhs (call);
8784 if (!scalar_dest)
8785 return false;
8786
8787 mask_index = internal_fn_mask_index (ifn);
8788 /* ??? For SLP the mask operand is always last. */
8789 if (mask_index >= 0 && slp_node)
8790 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8791 if (mask_index >= 0
8792 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8793 &mask, NULL, &mask_dt, &mask_vectype))
8794 return false;
8795 }
8796
8797 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8798 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8799
8800 if (loop_vinfo)
8801 {
8802 loop = LOOP_VINFO_LOOP (loop_vinfo);
8803 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8804 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8805 }
8806 else
8807 vf = 1;
8808
8809 /* Multiple types in SLP are handled by creating the appropriate number of
8810 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8811 case of SLP. */
8812 if (slp)
8813 ncopies = 1;
8814 else
8815 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8816
8817 gcc_assert (ncopies >= 1);
8818
8819 /* FORNOW. This restriction should be relaxed. */
8820 if (nested_in_vect_loop && ncopies > 1)
8821 {
8822 if (dump_enabled_p ())
8823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8824 "multiple types in nested loop.\n");
8825 return false;
8826 }
8827
8828 /* Invalidate assumptions made by dependence analysis when vectorization
8829 on the unrolled body effectively re-orders stmts. */
8830 if (ncopies > 1
8831 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8832 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8833 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8834 {
8835 if (dump_enabled_p ())
8836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8837 "cannot perform implicit CSE when unrolling "
8838 "with negative dependence distance\n");
8839 return false;
8840 }
8841
8842 elem_type = TREE_TYPE (vectype);
8843 mode = TYPE_MODE (vectype);
8844
8845 /* FORNOW. In some cases can vectorize even if data-type not supported
8846 (e.g. - data copies). */
8847 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8848 {
8849 if (dump_enabled_p ())
8850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8851 "Aligned load, but unsupported type.\n");
8852 return false;
8853 }
8854
8855 /* Check if the load is a part of an interleaving chain. */
8856 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8857 {
8858 grouped_load = true;
8859 /* FORNOW */
8860 gcc_assert (!nested_in_vect_loop);
8861 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8862
8863 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8864 group_size = DR_GROUP_SIZE (first_stmt_info);
8865
8866 /* Refuse non-SLP vectorization of SLP-only groups. */
8867 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8868 {
8869 if (dump_enabled_p ())
8870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8871 "cannot vectorize load in non-SLP mode.\n");
8872 return false;
8873 }
8874
8875 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8876 {
8877 slp_perm = true;
8878
8879 if (!loop_vinfo)
8880 {
8881 /* In BB vectorization we may not actually use a loaded vector
8882 accessing elements in excess of DR_GROUP_SIZE. */
8883 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8884 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8885 unsigned HOST_WIDE_INT nunits;
8886 unsigned j, k, maxk = 0;
8887 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8888 if (k > maxk)
8889 maxk = k;
8890 tree vectype = SLP_TREE_VECTYPE (slp_node);
8891 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8892 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8893 {
8894 if (dump_enabled_p ())
8895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8896 "BB vectorization with gaps at the end of "
8897 "a load is not supported\n");
8898 return false;
8899 }
8900 }
8901
8902 auto_vec<tree> tem;
8903 unsigned n_perms;
8904 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8905 true, &n_perms))
8906 {
8907 if (dump_enabled_p ())
8908 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8909 vect_location,
8910 "unsupported load permutation\n");
8911 return false;
8912 }
8913 }
8914
8915 /* Invalidate assumptions made by dependence analysis when vectorization
8916 on the unrolled body effectively re-orders stmts. */
8917 if (!PURE_SLP_STMT (stmt_info)
8918 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8919 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8920 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8921 {
8922 if (dump_enabled_p ())
8923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8924 "cannot perform implicit CSE when performing "
8925 "group loads with negative dependence distance\n");
8926 return false;
8927 }
8928 }
8929 else
8930 group_size = 1;
8931
8932 vect_memory_access_type memory_access_type;
8933 enum dr_alignment_support alignment_support_scheme;
8934 int misalignment;
8935 poly_int64 poffset;
8936 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8937 ncopies, &memory_access_type, &poffset,
8938 &alignment_support_scheme, &misalignment, &gs_info))
8939 return false;
8940
8941 if (mask)
8942 {
8943 if (memory_access_type == VMAT_CONTIGUOUS)
8944 {
8945 machine_mode vec_mode = TYPE_MODE (vectype);
8946 if (!VECTOR_MODE_P (vec_mode)
8947 || !can_vec_mask_load_store_p (vec_mode,
8948 TYPE_MODE (mask_vectype), true))
8949 return false;
8950 }
8951 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8952 && memory_access_type != VMAT_GATHER_SCATTER)
8953 {
8954 if (dump_enabled_p ())
8955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8956 "unsupported access type for masked load.\n");
8957 return false;
8958 }
8959 else if (memory_access_type == VMAT_GATHER_SCATTER
8960 && gs_info.ifn == IFN_LAST
8961 && !gs_info.decl)
8962 {
8963 if (dump_enabled_p ())
8964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8965 "unsupported masked emulated gather.\n");
8966 return false;
8967 }
8968 }
8969
8970 if (!vec_stmt) /* transformation not required. */
8971 {
8972 if (slp_node
8973 && mask
8974 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8975 mask_vectype))
8976 {
8977 if (dump_enabled_p ())
8978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8979 "incompatible vector types for invariants\n");
8980 return false;
8981 }
8982
8983 if (!slp)
8984 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8985
8986 if (loop_vinfo
8987 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8988 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8989 VLS_LOAD, group_size,
8990 memory_access_type, &gs_info,
8991 mask);
8992
8993 if (dump_enabled_p ()
8994 && memory_access_type != VMAT_ELEMENTWISE
8995 && memory_access_type != VMAT_GATHER_SCATTER
8996 && alignment_support_scheme != dr_aligned)
8997 dump_printf_loc (MSG_NOTE, vect_location,
8998 "Vectorizing an unaligned access.\n");
8999
9000 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9001 vinfo->any_known_not_updated_vssa = true;
9002
9003 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9004 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9005 alignment_support_scheme, misalignment,
9006 &gs_info, slp_node, cost_vec);
9007 return true;
9008 }
9009
9010 if (!slp)
9011 gcc_assert (memory_access_type
9012 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9013
9014 if (dump_enabled_p ())
9015 dump_printf_loc (MSG_NOTE, vect_location,
9016 "transform load. ncopies = %d\n", ncopies);
9017
9018 /* Transform. */
9019
9020 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9021 ensure_base_align (dr_info);
9022
9023 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9024 {
9025 vect_build_gather_load_calls (vinfo,
9026 stmt_info, gsi, vec_stmt, &gs_info, mask);
9027 return true;
9028 }
9029
9030 if (memory_access_type == VMAT_INVARIANT)
9031 {
9032 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9033 /* If we have versioned for aliasing or the loop doesn't
9034 have any data dependencies that would preclude this,
9035 then we are sure this is a loop invariant load and
9036 thus we can insert it on the preheader edge. */
9037 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9038 && !nested_in_vect_loop
9039 && hoist_defs_of_uses (stmt_info, loop));
9040 if (hoist_p)
9041 {
9042 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9043 if (dump_enabled_p ())
9044 dump_printf_loc (MSG_NOTE, vect_location,
9045 "hoisting out of the vectorized loop: %G",
9046 (gimple *) stmt);
9047 scalar_dest = copy_ssa_name (scalar_dest);
9048 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9049 edge pe = loop_preheader_edge (loop);
9050 gphi *vphi = get_virtual_phi (loop->header);
9051 tree vuse;
9052 if (vphi)
9053 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9054 else
9055 vuse = gimple_vuse (gsi_stmt (*gsi));
9056 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9057 gimple_set_vuse (new_stmt, vuse);
9058 gsi_insert_on_edge_immediate (pe, new_stmt);
9059 }
9060 /* These copies are all equivalent, but currently the representation
9061 requires a separate STMT_VINFO_VEC_STMT for each one. */
9062 gimple_stmt_iterator gsi2 = *gsi;
9063 gsi_next (&gsi2);
9064 for (j = 0; j < ncopies; j++)
9065 {
9066 if (hoist_p)
9067 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9068 vectype, NULL);
9069 else
9070 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9071 vectype, &gsi2);
9072 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9073 if (slp)
9074 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9075 else
9076 {
9077 if (j == 0)
9078 *vec_stmt = new_stmt;
9079 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9080 }
9081 }
9082 return true;
9083 }
9084
9085 if (memory_access_type == VMAT_ELEMENTWISE
9086 || memory_access_type == VMAT_STRIDED_SLP)
9087 {
9088 gimple_stmt_iterator incr_gsi;
9089 bool insert_after;
9090 tree offvar;
9091 tree ivstep;
9092 tree running_off;
9093 vec<constructor_elt, va_gc> *v = NULL;
9094 tree stride_base, stride_step, alias_off;
9095 /* Checked by get_load_store_type. */
9096 unsigned int const_nunits = nunits.to_constant ();
9097 unsigned HOST_WIDE_INT cst_offset = 0;
9098 tree dr_offset;
9099
9100 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9101 gcc_assert (!nested_in_vect_loop);
9102
9103 if (grouped_load)
9104 {
9105 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9106 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9107 }
9108 else
9109 {
9110 first_stmt_info = stmt_info;
9111 first_dr_info = dr_info;
9112 }
9113 if (slp && grouped_load)
9114 {
9115 group_size = DR_GROUP_SIZE (first_stmt_info);
9116 ref_type = get_group_alias_ptr_type (first_stmt_info);
9117 }
9118 else
9119 {
9120 if (grouped_load)
9121 cst_offset
9122 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9123 * vect_get_place_in_interleaving_chain (stmt_info,
9124 first_stmt_info));
9125 group_size = 1;
9126 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9127 }
9128
9129 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9130 stride_base
9131 = fold_build_pointer_plus
9132 (DR_BASE_ADDRESS (first_dr_info->dr),
9133 size_binop (PLUS_EXPR,
9134 convert_to_ptrofftype (dr_offset),
9135 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9136 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9137
9138 /* For a load with loop-invariant (but other than power-of-2)
9139 stride (i.e. not a grouped access) like so:
9140
9141 for (i = 0; i < n; i += stride)
9142 ... = array[i];
9143
9144 we generate a new induction variable and new accesses to
9145 form a new vector (or vectors, depending on ncopies):
9146
9147 for (j = 0; ; j += VF*stride)
9148 tmp1 = array[j];
9149 tmp2 = array[j + stride];
9150 ...
9151 vectemp = {tmp1, tmp2, ...}
9152 */
9153
9154 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9155 build_int_cst (TREE_TYPE (stride_step), vf));
9156
9157 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9158
9159 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9160 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9161 create_iv (stride_base, ivstep, NULL,
9162 loop, &incr_gsi, insert_after,
9163 &offvar, NULL);
9164
9165 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9166
9167 running_off = offvar;
9168 alias_off = build_int_cst (ref_type, 0);
9169 int nloads = const_nunits;
9170 int lnel = 1;
9171 tree ltype = TREE_TYPE (vectype);
9172 tree lvectype = vectype;
9173 auto_vec<tree> dr_chain;
9174 if (memory_access_type == VMAT_STRIDED_SLP)
9175 {
9176 if (group_size < const_nunits)
9177 {
9178 /* First check if vec_init optab supports construction from vector
9179 elts directly. Otherwise avoid emitting a constructor of
9180 vector elements by performing the loads using an integer type
9181 of the same size, constructing a vector of those and then
9182 re-interpreting it as the original vector type. This avoids a
9183 huge runtime penalty due to the general inability to perform
9184 store forwarding from smaller stores to a larger load. */
9185 tree ptype;
9186 tree vtype
9187 = vector_vector_composition_type (vectype,
9188 const_nunits / group_size,
9189 &ptype);
9190 if (vtype != NULL_TREE)
9191 {
9192 nloads = const_nunits / group_size;
9193 lnel = group_size;
9194 lvectype = vtype;
9195 ltype = ptype;
9196 }
9197 }
9198 else
9199 {
9200 nloads = 1;
9201 lnel = const_nunits;
9202 ltype = vectype;
9203 }
9204 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9205 }
9206 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9207 else if (nloads == 1)
9208 ltype = vectype;
9209
9210 if (slp)
9211 {
9212 /* For SLP permutation support we need to load the whole group,
9213 not only the number of vector stmts the permutation result
9214 fits in. */
9215 if (slp_perm)
9216 {
9217 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9218 variable VF. */
9219 unsigned int const_vf = vf.to_constant ();
9220 ncopies = CEIL (group_size * const_vf, const_nunits);
9221 dr_chain.create (ncopies);
9222 }
9223 else
9224 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9225 }
9226 unsigned int group_el = 0;
9227 unsigned HOST_WIDE_INT
9228 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9229 for (j = 0; j < ncopies; j++)
9230 {
9231 if (nloads > 1)
9232 vec_alloc (v, nloads);
9233 gimple *new_stmt = NULL;
9234 for (i = 0; i < nloads; i++)
9235 {
9236 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9237 group_el * elsz + cst_offset);
9238 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9239 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9240 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9241 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9242 if (nloads > 1)
9243 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9244 gimple_assign_lhs (new_stmt));
9245
9246 group_el += lnel;
9247 if (! slp
9248 || group_el == group_size)
9249 {
9250 tree newoff = copy_ssa_name (running_off);
9251 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9252 running_off, stride_step);
9253 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9254
9255 running_off = newoff;
9256 group_el = 0;
9257 }
9258 }
9259 if (nloads > 1)
9260 {
9261 tree vec_inv = build_constructor (lvectype, v);
9262 new_temp = vect_init_vector (vinfo, stmt_info,
9263 vec_inv, lvectype, gsi);
9264 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9265 if (lvectype != vectype)
9266 {
9267 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9268 VIEW_CONVERT_EXPR,
9269 build1 (VIEW_CONVERT_EXPR,
9270 vectype, new_temp));
9271 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9272 }
9273 }
9274
9275 if (slp)
9276 {
9277 if (slp_perm)
9278 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9279 else
9280 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9281 }
9282 else
9283 {
9284 if (j == 0)
9285 *vec_stmt = new_stmt;
9286 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9287 }
9288 }
9289 if (slp_perm)
9290 {
9291 unsigned n_perms;
9292 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9293 false, &n_perms);
9294 }
9295 return true;
9296 }
9297
9298 if (memory_access_type == VMAT_GATHER_SCATTER
9299 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9300 grouped_load = false;
9301
9302 if (grouped_load)
9303 {
9304 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9305 group_size = DR_GROUP_SIZE (first_stmt_info);
9306 /* For SLP vectorization we directly vectorize a subchain
9307 without permutation. */
9308 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9309 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9310 /* For BB vectorization always use the first stmt to base
9311 the data ref pointer on. */
9312 if (bb_vinfo)
9313 first_stmt_info_for_drptr
9314 = vect_find_first_scalar_stmt_in_slp (slp_node);
9315
9316 /* Check if the chain of loads is already vectorized. */
9317 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9318 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9319 ??? But we can only do so if there is exactly one
9320 as we have no way to get at the rest. Leave the CSE
9321 opportunity alone.
9322 ??? With the group load eventually participating
9323 in multiple different permutations (having multiple
9324 slp nodes which refer to the same group) the CSE
9325 is even wrong code. See PR56270. */
9326 && !slp)
9327 {
9328 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9329 return true;
9330 }
9331 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9332 group_gap_adj = 0;
9333
9334 /* VEC_NUM is the number of vect stmts to be created for this group. */
9335 if (slp)
9336 {
9337 grouped_load = false;
9338 /* If an SLP permutation is from N elements to N elements,
9339 and if one vector holds a whole number of N, we can load
9340 the inputs to the permutation in the same way as an
9341 unpermuted sequence. In other cases we need to load the
9342 whole group, not only the number of vector stmts the
9343 permutation result fits in. */
9344 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9345 if (slp_perm
9346 && (group_size != scalar_lanes
9347 || !multiple_p (nunits, group_size)))
9348 {
9349 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9350 variable VF; see vect_transform_slp_perm_load. */
9351 unsigned int const_vf = vf.to_constant ();
9352 unsigned int const_nunits = nunits.to_constant ();
9353 vec_num = CEIL (group_size * const_vf, const_nunits);
9354 group_gap_adj = vf * group_size - nunits * vec_num;
9355 }
9356 else
9357 {
9358 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9359 group_gap_adj
9360 = group_size - scalar_lanes;
9361 }
9362 }
9363 else
9364 vec_num = group_size;
9365
9366 ref_type = get_group_alias_ptr_type (first_stmt_info);
9367 }
9368 else
9369 {
9370 first_stmt_info = stmt_info;
9371 first_dr_info = dr_info;
9372 group_size = vec_num = 1;
9373 group_gap_adj = 0;
9374 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9375 if (slp)
9376 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9377 }
9378
9379 gcc_assert (alignment_support_scheme);
9380 vec_loop_masks *loop_masks
9381 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9382 ? &LOOP_VINFO_MASKS (loop_vinfo)
9383 : NULL);
9384 vec_loop_lens *loop_lens
9385 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9386 ? &LOOP_VINFO_LENS (loop_vinfo)
9387 : NULL);
9388
9389 /* Shouldn't go with length-based approach if fully masked. */
9390 gcc_assert (!loop_lens || !loop_masks);
9391
9392 /* Targets with store-lane instructions must not require explicit
9393 realignment. vect_supportable_dr_alignment always returns either
9394 dr_aligned or dr_unaligned_supported for masked operations. */
9395 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9396 && !mask
9397 && !loop_masks)
9398 || alignment_support_scheme == dr_aligned
9399 || alignment_support_scheme == dr_unaligned_supported);
9400
9401 /* In case the vectorization factor (VF) is bigger than the number
9402 of elements that we can fit in a vectype (nunits), we have to generate
9403 more than one vector stmt - i.e - we need to "unroll" the
9404 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9405 from one copy of the vector stmt to the next, in the field
9406 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9407 stages to find the correct vector defs to be used when vectorizing
9408 stmts that use the defs of the current stmt. The example below
9409 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9410 need to create 4 vectorized stmts):
9411
9412 before vectorization:
9413 RELATED_STMT VEC_STMT
9414 S1: x = memref - -
9415 S2: z = x + 1 - -
9416
9417 step 1: vectorize stmt S1:
9418 We first create the vector stmt VS1_0, and, as usual, record a
9419 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9420 Next, we create the vector stmt VS1_1, and record a pointer to
9421 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9422 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9423 stmts and pointers:
9424 RELATED_STMT VEC_STMT
9425 VS1_0: vx0 = memref0 VS1_1 -
9426 VS1_1: vx1 = memref1 VS1_2 -
9427 VS1_2: vx2 = memref2 VS1_3 -
9428 VS1_3: vx3 = memref3 - -
9429 S1: x = load - VS1_0
9430 S2: z = x + 1 - -
9431 */
9432
9433 /* In case of interleaving (non-unit grouped access):
9434
9435 S1: x2 = &base + 2
9436 S2: x0 = &base
9437 S3: x1 = &base + 1
9438 S4: x3 = &base + 3
9439
9440 Vectorized loads are created in the order of memory accesses
9441 starting from the access of the first stmt of the chain:
9442
9443 VS1: vx0 = &base
9444 VS2: vx1 = &base + vec_size*1
9445 VS3: vx3 = &base + vec_size*2
9446 VS4: vx4 = &base + vec_size*3
9447
9448 Then permutation statements are generated:
9449
9450 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9451 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9452 ...
9453
9454 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9455 (the order of the data-refs in the output of vect_permute_load_chain
9456 corresponds to the order of scalar stmts in the interleaving chain - see
9457 the documentation of vect_permute_load_chain()).
9458 The generation of permutation stmts and recording them in
9459 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9460
9461 In case of both multiple types and interleaving, the vector loads and
9462 permutation stmts above are created for every copy. The result vector
9463 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9464 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9465
9466 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9467 on a target that supports unaligned accesses (dr_unaligned_supported)
9468 we generate the following code:
9469 p = initial_addr;
9470 indx = 0;
9471 loop {
9472 p = p + indx * vectype_size;
9473 vec_dest = *(p);
9474 indx = indx + 1;
9475 }
9476
9477 Otherwise, the data reference is potentially unaligned on a target that
9478 does not support unaligned accesses (dr_explicit_realign_optimized) -
9479 then generate the following code, in which the data in each iteration is
9480 obtained by two vector loads, one from the previous iteration, and one
9481 from the current iteration:
9482 p1 = initial_addr;
9483 msq_init = *(floor(p1))
9484 p2 = initial_addr + VS - 1;
9485 realignment_token = call target_builtin;
9486 indx = 0;
9487 loop {
9488 p2 = p2 + indx * vectype_size
9489 lsq = *(floor(p2))
9490 vec_dest = realign_load (msq, lsq, realignment_token)
9491 indx = indx + 1;
9492 msq = lsq;
9493 } */
9494
9495 /* If the misalignment remains the same throughout the execution of the
9496 loop, we can create the init_addr and permutation mask at the loop
9497 preheader. Otherwise, it needs to be created inside the loop.
9498 This can only occur when vectorizing memory accesses in the inner-loop
9499 nested within an outer-loop that is being vectorized. */
9500
9501 if (nested_in_vect_loop
9502 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9503 GET_MODE_SIZE (TYPE_MODE (vectype))))
9504 {
9505 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9506 compute_in_loop = true;
9507 }
9508
9509 bool diff_first_stmt_info
9510 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9511
9512 tree offset = NULL_TREE;
9513 if ((alignment_support_scheme == dr_explicit_realign_optimized
9514 || alignment_support_scheme == dr_explicit_realign)
9515 && !compute_in_loop)
9516 {
9517 /* If we have different first_stmt_info, we can't set up realignment
9518 here, since we can't guarantee first_stmt_info DR has been
9519 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9520 distance from first_stmt_info DR instead as below. */
9521 if (!diff_first_stmt_info)
9522 msq = vect_setup_realignment (vinfo,
9523 first_stmt_info, gsi, &realignment_token,
9524 alignment_support_scheme, NULL_TREE,
9525 &at_loop);
9526 if (alignment_support_scheme == dr_explicit_realign_optimized)
9527 {
9528 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9529 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9530 size_one_node);
9531 gcc_assert (!first_stmt_info_for_drptr);
9532 }
9533 }
9534 else
9535 at_loop = loop;
9536
9537 if (!known_eq (poffset, 0))
9538 offset = (offset
9539 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9540 : size_int (poffset));
9541
9542 tree bump;
9543 tree vec_offset = NULL_TREE;
9544 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9545 {
9546 aggr_type = NULL_TREE;
9547 bump = NULL_TREE;
9548 }
9549 else if (memory_access_type == VMAT_GATHER_SCATTER)
9550 {
9551 aggr_type = elem_type;
9552 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9553 &bump, &vec_offset);
9554 }
9555 else
9556 {
9557 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9558 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9559 else
9560 aggr_type = vectype;
9561 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9562 memory_access_type);
9563 }
9564
9565 auto_vec<tree> vec_offsets;
9566 auto_vec<tree> vec_masks;
9567 if (mask)
9568 {
9569 if (slp_node)
9570 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9571 &vec_masks);
9572 else
9573 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9574 &vec_masks, mask_vectype);
9575 }
9576 tree vec_mask = NULL_TREE;
9577 poly_uint64 group_elt = 0;
9578 for (j = 0; j < ncopies; j++)
9579 {
9580 /* 1. Create the vector or array pointer update chain. */
9581 if (j == 0)
9582 {
9583 bool simd_lane_access_p
9584 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9585 if (simd_lane_access_p
9586 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9587 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9588 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9589 && integer_zerop (DR_INIT (first_dr_info->dr))
9590 && alias_sets_conflict_p (get_alias_set (aggr_type),
9591 get_alias_set (TREE_TYPE (ref_type)))
9592 && (alignment_support_scheme == dr_aligned
9593 || alignment_support_scheme == dr_unaligned_supported))
9594 {
9595 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9596 dataref_offset = build_int_cst (ref_type, 0);
9597 }
9598 else if (diff_first_stmt_info)
9599 {
9600 dataref_ptr
9601 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9602 aggr_type, at_loop, offset, &dummy,
9603 gsi, &ptr_incr, simd_lane_access_p,
9604 bump);
9605 /* Adjust the pointer by the difference to first_stmt. */
9606 data_reference_p ptrdr
9607 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9608 tree diff
9609 = fold_convert (sizetype,
9610 size_binop (MINUS_EXPR,
9611 DR_INIT (first_dr_info->dr),
9612 DR_INIT (ptrdr)));
9613 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9614 stmt_info, diff);
9615 if (alignment_support_scheme == dr_explicit_realign)
9616 {
9617 msq = vect_setup_realignment (vinfo,
9618 first_stmt_info_for_drptr, gsi,
9619 &realignment_token,
9620 alignment_support_scheme,
9621 dataref_ptr, &at_loop);
9622 gcc_assert (!compute_in_loop);
9623 }
9624 }
9625 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9626 {
9627 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9628 slp_node, &gs_info, &dataref_ptr,
9629 &vec_offsets);
9630 }
9631 else
9632 dataref_ptr
9633 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9634 at_loop,
9635 offset, &dummy, gsi, &ptr_incr,
9636 simd_lane_access_p, bump);
9637 if (mask)
9638 vec_mask = vec_masks[0];
9639 }
9640 else
9641 {
9642 if (dataref_offset)
9643 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9644 bump);
9645 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9646 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9647 stmt_info, bump);
9648 if (mask)
9649 vec_mask = vec_masks[j];
9650 }
9651
9652 if (grouped_load || slp_perm)
9653 dr_chain.create (vec_num);
9654
9655 gimple *new_stmt = NULL;
9656 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9657 {
9658 tree vec_array;
9659
9660 vec_array = create_vector_array (vectype, vec_num);
9661
9662 tree final_mask = NULL_TREE;
9663 if (loop_masks)
9664 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9665 vectype, j);
9666 if (vec_mask)
9667 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9668 final_mask, vec_mask, gsi);
9669
9670 gcall *call;
9671 if (final_mask)
9672 {
9673 /* Emit:
9674 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9675 VEC_MASK). */
9676 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9677 tree alias_ptr = build_int_cst (ref_type, align);
9678 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9679 dataref_ptr, alias_ptr,
9680 final_mask);
9681 }
9682 else
9683 {
9684 /* Emit:
9685 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9686 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9687 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9688 }
9689 gimple_call_set_lhs (call, vec_array);
9690 gimple_call_set_nothrow (call, true);
9691 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9692 new_stmt = call;
9693
9694 /* Extract each vector into an SSA_NAME. */
9695 for (i = 0; i < vec_num; i++)
9696 {
9697 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9698 vec_array, i);
9699 dr_chain.quick_push (new_temp);
9700 }
9701
9702 /* Record the mapping between SSA_NAMEs and statements. */
9703 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9704
9705 /* Record that VEC_ARRAY is now dead. */
9706 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9707 }
9708 else
9709 {
9710 for (i = 0; i < vec_num; i++)
9711 {
9712 tree final_mask = NULL_TREE;
9713 if (loop_masks
9714 && memory_access_type != VMAT_INVARIANT)
9715 final_mask = vect_get_loop_mask (gsi, loop_masks,
9716 vec_num * ncopies,
9717 vectype, vec_num * j + i);
9718 if (vec_mask)
9719 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9720 final_mask, vec_mask, gsi);
9721
9722 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9723 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9724 gsi, stmt_info, bump);
9725
9726 /* 2. Create the vector-load in the loop. */
9727 switch (alignment_support_scheme)
9728 {
9729 case dr_aligned:
9730 case dr_unaligned_supported:
9731 {
9732 unsigned int misalign;
9733 unsigned HOST_WIDE_INT align;
9734
9735 if (memory_access_type == VMAT_GATHER_SCATTER
9736 && gs_info.ifn != IFN_LAST)
9737 {
9738 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9739 vec_offset = vec_offsets[vec_num * j + i];
9740 tree zero = build_zero_cst (vectype);
9741 tree scale = size_int (gs_info.scale);
9742 gcall *call;
9743 if (final_mask)
9744 call = gimple_build_call_internal
9745 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9746 vec_offset, scale, zero, final_mask);
9747 else
9748 call = gimple_build_call_internal
9749 (IFN_GATHER_LOAD, 4, dataref_ptr,
9750 vec_offset, scale, zero);
9751 gimple_call_set_nothrow (call, true);
9752 new_stmt = call;
9753 data_ref = NULL_TREE;
9754 break;
9755 }
9756 else if (memory_access_type == VMAT_GATHER_SCATTER)
9757 {
9758 /* Emulated gather-scatter. */
9759 gcc_assert (!final_mask);
9760 unsigned HOST_WIDE_INT const_nunits
9761 = nunits.to_constant ();
9762 unsigned HOST_WIDE_INT const_offset_nunits
9763 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9764 .to_constant ();
9765 vec<constructor_elt, va_gc> *ctor_elts;
9766 vec_alloc (ctor_elts, const_nunits);
9767 gimple_seq stmts = NULL;
9768 /* We support offset vectors with more elements
9769 than the data vector for now. */
9770 unsigned HOST_WIDE_INT factor
9771 = const_offset_nunits / const_nunits;
9772 vec_offset = vec_offsets[j / factor];
9773 unsigned elt_offset = (j % factor) * const_nunits;
9774 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9775 tree scale = size_int (gs_info.scale);
9776 align
9777 = get_object_alignment (DR_REF (first_dr_info->dr));
9778 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9779 align);
9780 for (unsigned k = 0; k < const_nunits; ++k)
9781 {
9782 tree boff = size_binop (MULT_EXPR,
9783 TYPE_SIZE (idx_type),
9784 bitsize_int
9785 (k + elt_offset));
9786 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9787 idx_type, vec_offset,
9788 TYPE_SIZE (idx_type),
9789 boff);
9790 idx = gimple_convert (&stmts, sizetype, idx);
9791 idx = gimple_build (&stmts, MULT_EXPR,
9792 sizetype, idx, scale);
9793 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9794 TREE_TYPE (dataref_ptr),
9795 dataref_ptr, idx);
9796 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9797 tree elt = make_ssa_name (TREE_TYPE (vectype));
9798 tree ref = build2 (MEM_REF, ltype, ptr,
9799 build_int_cst (ref_type, 0));
9800 new_stmt = gimple_build_assign (elt, ref);
9801 gimple_set_vuse (new_stmt,
9802 gimple_vuse (gsi_stmt (*gsi)));
9803 gimple_seq_add_stmt (&stmts, new_stmt);
9804 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9805 }
9806 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9807 new_stmt = gimple_build_assign (NULL_TREE,
9808 build_constructor
9809 (vectype, ctor_elts));
9810 data_ref = NULL_TREE;
9811 break;
9812 }
9813
9814 align =
9815 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9816 if (alignment_support_scheme == dr_aligned)
9817 misalign = 0;
9818 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9819 {
9820 align = dr_alignment
9821 (vect_dr_behavior (vinfo, first_dr_info));
9822 misalign = 0;
9823 }
9824 else
9825 misalign = misalignment;
9826 if (dataref_offset == NULL_TREE
9827 && TREE_CODE (dataref_ptr) == SSA_NAME)
9828 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9829 align, misalign);
9830 align = least_bit_hwi (misalign | align);
9831
9832 if (final_mask)
9833 {
9834 tree ptr = build_int_cst (ref_type,
9835 align * BITS_PER_UNIT);
9836 gcall *call
9837 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9838 dataref_ptr, ptr,
9839 final_mask);
9840 gimple_call_set_nothrow (call, true);
9841 new_stmt = call;
9842 data_ref = NULL_TREE;
9843 }
9844 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9845 {
9846 tree final_len
9847 = vect_get_loop_len (loop_vinfo, loop_lens,
9848 vec_num * ncopies,
9849 vec_num * j + i);
9850 tree ptr = build_int_cst (ref_type,
9851 align * BITS_PER_UNIT);
9852
9853 machine_mode vmode = TYPE_MODE (vectype);
9854 opt_machine_mode new_ovmode
9855 = get_len_load_store_mode (vmode, true);
9856 machine_mode new_vmode = new_ovmode.require ();
9857 tree qi_type = unsigned_intQI_type_node;
9858
9859 signed char biasval =
9860 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9861
9862 tree bias = build_int_cst (intQI_type_node, biasval);
9863
9864 gcall *call
9865 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
9866 dataref_ptr, ptr,
9867 final_len, bias);
9868 gimple_call_set_nothrow (call, true);
9869 new_stmt = call;
9870 data_ref = NULL_TREE;
9871
9872 /* Need conversion if it's wrapped with VnQI. */
9873 if (vmode != new_vmode)
9874 {
9875 tree new_vtype
9876 = build_vector_type_for_mode (qi_type, new_vmode);
9877 tree var = vect_get_new_ssa_name (new_vtype,
9878 vect_simple_var);
9879 gimple_set_lhs (call, var);
9880 vect_finish_stmt_generation (vinfo, stmt_info, call,
9881 gsi);
9882 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9883 new_stmt
9884 = gimple_build_assign (vec_dest,
9885 VIEW_CONVERT_EXPR, op);
9886 }
9887 }
9888 else
9889 {
9890 tree ltype = vectype;
9891 tree new_vtype = NULL_TREE;
9892 unsigned HOST_WIDE_INT gap
9893 = DR_GROUP_GAP (first_stmt_info);
9894 unsigned int vect_align
9895 = vect_known_alignment_in_bytes (first_dr_info,
9896 vectype);
9897 unsigned int scalar_dr_size
9898 = vect_get_scalar_dr_size (first_dr_info);
9899 /* If there's no peeling for gaps but we have a gap
9900 with slp loads then load the lower half of the
9901 vector only. See get_group_load_store_type for
9902 when we apply this optimization. */
9903 if (slp
9904 && loop_vinfo
9905 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9906 && gap != 0
9907 && known_eq (nunits, (group_size - gap) * 2)
9908 && known_eq (nunits, group_size)
9909 && gap >= (vect_align / scalar_dr_size))
9910 {
9911 tree half_vtype;
9912 new_vtype
9913 = vector_vector_composition_type (vectype, 2,
9914 &half_vtype);
9915 if (new_vtype != NULL_TREE)
9916 ltype = half_vtype;
9917 }
9918 tree offset
9919 = (dataref_offset ? dataref_offset
9920 : build_int_cst (ref_type, 0));
9921 if (ltype != vectype
9922 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9923 {
9924 unsigned HOST_WIDE_INT gap_offset
9925 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9926 tree gapcst = build_int_cst (ref_type, gap_offset);
9927 offset = size_binop (PLUS_EXPR, offset, gapcst);
9928 }
9929 data_ref
9930 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9931 if (alignment_support_scheme == dr_aligned)
9932 ;
9933 else
9934 TREE_TYPE (data_ref)
9935 = build_aligned_type (TREE_TYPE (data_ref),
9936 align * BITS_PER_UNIT);
9937 if (ltype != vectype)
9938 {
9939 vect_copy_ref_info (data_ref,
9940 DR_REF (first_dr_info->dr));
9941 tree tem = make_ssa_name (ltype);
9942 new_stmt = gimple_build_assign (tem, data_ref);
9943 vect_finish_stmt_generation (vinfo, stmt_info,
9944 new_stmt, gsi);
9945 data_ref = NULL;
9946 vec<constructor_elt, va_gc> *v;
9947 vec_alloc (v, 2);
9948 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9949 {
9950 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9951 build_zero_cst (ltype));
9952 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9953 }
9954 else
9955 {
9956 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9957 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9958 build_zero_cst (ltype));
9959 }
9960 gcc_assert (new_vtype != NULL_TREE);
9961 if (new_vtype == vectype)
9962 new_stmt = gimple_build_assign (
9963 vec_dest, build_constructor (vectype, v));
9964 else
9965 {
9966 tree new_vname = make_ssa_name (new_vtype);
9967 new_stmt = gimple_build_assign (
9968 new_vname, build_constructor (new_vtype, v));
9969 vect_finish_stmt_generation (vinfo, stmt_info,
9970 new_stmt, gsi);
9971 new_stmt = gimple_build_assign (
9972 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9973 new_vname));
9974 }
9975 }
9976 }
9977 break;
9978 }
9979 case dr_explicit_realign:
9980 {
9981 tree ptr, bump;
9982
9983 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9984
9985 if (compute_in_loop)
9986 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9987 &realignment_token,
9988 dr_explicit_realign,
9989 dataref_ptr, NULL);
9990
9991 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9992 ptr = copy_ssa_name (dataref_ptr);
9993 else
9994 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9995 // For explicit realign the target alignment should be
9996 // known at compile time.
9997 unsigned HOST_WIDE_INT align =
9998 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9999 new_stmt = gimple_build_assign
10000 (ptr, BIT_AND_EXPR, dataref_ptr,
10001 build_int_cst
10002 (TREE_TYPE (dataref_ptr),
10003 -(HOST_WIDE_INT) align));
10004 vect_finish_stmt_generation (vinfo, stmt_info,
10005 new_stmt, gsi);
10006 data_ref
10007 = build2 (MEM_REF, vectype, ptr,
10008 build_int_cst (ref_type, 0));
10009 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10010 vec_dest = vect_create_destination_var (scalar_dest,
10011 vectype);
10012 new_stmt = gimple_build_assign (vec_dest, data_ref);
10013 new_temp = make_ssa_name (vec_dest, new_stmt);
10014 gimple_assign_set_lhs (new_stmt, new_temp);
10015 gimple_move_vops (new_stmt, stmt_info->stmt);
10016 vect_finish_stmt_generation (vinfo, stmt_info,
10017 new_stmt, gsi);
10018 msq = new_temp;
10019
10020 bump = size_binop (MULT_EXPR, vs,
10021 TYPE_SIZE_UNIT (elem_type));
10022 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10023 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10024 stmt_info, bump);
10025 new_stmt = gimple_build_assign
10026 (NULL_TREE, BIT_AND_EXPR, ptr,
10027 build_int_cst
10028 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10029 if (TREE_CODE (ptr) == SSA_NAME)
10030 ptr = copy_ssa_name (ptr, new_stmt);
10031 else
10032 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
10033 gimple_assign_set_lhs (new_stmt, ptr);
10034 vect_finish_stmt_generation (vinfo, stmt_info,
10035 new_stmt, gsi);
10036 data_ref
10037 = build2 (MEM_REF, vectype, ptr,
10038 build_int_cst (ref_type, 0));
10039 break;
10040 }
10041 case dr_explicit_realign_optimized:
10042 {
10043 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10044 new_temp = copy_ssa_name (dataref_ptr);
10045 else
10046 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10047 // We should only be doing this if we know the target
10048 // alignment at compile time.
10049 unsigned HOST_WIDE_INT align =
10050 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10051 new_stmt = gimple_build_assign
10052 (new_temp, BIT_AND_EXPR, dataref_ptr,
10053 build_int_cst (TREE_TYPE (dataref_ptr),
10054 -(HOST_WIDE_INT) align));
10055 vect_finish_stmt_generation (vinfo, stmt_info,
10056 new_stmt, gsi);
10057 data_ref
10058 = build2 (MEM_REF, vectype, new_temp,
10059 build_int_cst (ref_type, 0));
10060 break;
10061 }
10062 default:
10063 gcc_unreachable ();
10064 }
10065 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10066 /* DATA_REF is null if we've already built the statement. */
10067 if (data_ref)
10068 {
10069 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10070 new_stmt = gimple_build_assign (vec_dest, data_ref);
10071 }
10072 new_temp = make_ssa_name (vec_dest, new_stmt);
10073 gimple_set_lhs (new_stmt, new_temp);
10074 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10075
10076 /* 3. Handle explicit realignment if necessary/supported.
10077 Create in loop:
10078 vec_dest = realign_load (msq, lsq, realignment_token) */
10079 if (alignment_support_scheme == dr_explicit_realign_optimized
10080 || alignment_support_scheme == dr_explicit_realign)
10081 {
10082 lsq = gimple_assign_lhs (new_stmt);
10083 if (!realignment_token)
10084 realignment_token = dataref_ptr;
10085 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10086 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10087 msq, lsq, realignment_token);
10088 new_temp = make_ssa_name (vec_dest, new_stmt);
10089 gimple_assign_set_lhs (new_stmt, new_temp);
10090 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10091
10092 if (alignment_support_scheme == dr_explicit_realign_optimized)
10093 {
10094 gcc_assert (phi);
10095 if (i == vec_num - 1 && j == ncopies - 1)
10096 add_phi_arg (phi, lsq,
10097 loop_latch_edge (containing_loop),
10098 UNKNOWN_LOCATION);
10099 msq = lsq;
10100 }
10101 }
10102
10103 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10104 {
10105 tree perm_mask = perm_mask_for_reverse (vectype);
10106 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10107 perm_mask, stmt_info, gsi);
10108 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10109 }
10110
10111 /* Collect vector loads and later create their permutation in
10112 vect_transform_grouped_load (). */
10113 if (grouped_load || slp_perm)
10114 dr_chain.quick_push (new_temp);
10115
10116 /* Store vector loads in the corresponding SLP_NODE. */
10117 if (slp && !slp_perm)
10118 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10119
10120 /* With SLP permutation we load the gaps as well, without
10121 we need to skip the gaps after we manage to fully load
10122 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10123 group_elt += nunits;
10124 if (maybe_ne (group_gap_adj, 0U)
10125 && !slp_perm
10126 && known_eq (group_elt, group_size - group_gap_adj))
10127 {
10128 poly_wide_int bump_val
10129 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10130 * group_gap_adj);
10131 if (tree_int_cst_sgn
10132 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10133 bump_val = -bump_val;
10134 tree bump = wide_int_to_tree (sizetype, bump_val);
10135 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10136 gsi, stmt_info, bump);
10137 group_elt = 0;
10138 }
10139 }
10140 /* Bump the vector pointer to account for a gap or for excess
10141 elements loaded for a permuted SLP load. */
10142 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10143 {
10144 poly_wide_int bump_val
10145 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10146 * group_gap_adj);
10147 if (tree_int_cst_sgn
10148 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10149 bump_val = -bump_val;
10150 tree bump = wide_int_to_tree (sizetype, bump_val);
10151 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10152 stmt_info, bump);
10153 }
10154 }
10155
10156 if (slp && !slp_perm)
10157 continue;
10158
10159 if (slp_perm)
10160 {
10161 unsigned n_perms;
10162 /* For SLP we know we've seen all possible uses of dr_chain so
10163 direct vect_transform_slp_perm_load to DCE the unused parts.
10164 ??? This is a hack to prevent compile-time issues as seen
10165 in PR101120 and friends. */
10166 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10167 gsi, vf, false, &n_perms,
10168 nullptr, true);
10169 gcc_assert (ok);
10170 }
10171 else
10172 {
10173 if (grouped_load)
10174 {
10175 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10176 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10177 group_size, gsi);
10178 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10179 }
10180 else
10181 {
10182 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10183 }
10184 }
10185 dr_chain.release ();
10186 }
10187 if (!slp)
10188 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10189
10190 return true;
10191 }
10192
10193 /* Function vect_is_simple_cond.
10194
10195 Input:
10196 LOOP - the loop that is being vectorized.
10197 COND - Condition that is checked for simple use.
10198
10199 Output:
10200 *COMP_VECTYPE - the vector type for the comparison.
10201 *DTS - The def types for the arguments of the comparison
10202
10203 Returns whether a COND can be vectorized. Checks whether
10204 condition operands are supportable using vec_is_simple_use. */
10205
10206 static bool
10207 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10208 slp_tree slp_node, tree *comp_vectype,
10209 enum vect_def_type *dts, tree vectype)
10210 {
10211 tree lhs, rhs;
10212 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10213 slp_tree slp_op;
10214
10215 /* Mask case. */
10216 if (TREE_CODE (cond) == SSA_NAME
10217 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10218 {
10219 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10220 &slp_op, &dts[0], comp_vectype)
10221 || !*comp_vectype
10222 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10223 return false;
10224 return true;
10225 }
10226
10227 if (!COMPARISON_CLASS_P (cond))
10228 return false;
10229
10230 lhs = TREE_OPERAND (cond, 0);
10231 rhs = TREE_OPERAND (cond, 1);
10232
10233 if (TREE_CODE (lhs) == SSA_NAME)
10234 {
10235 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10236 &lhs, &slp_op, &dts[0], &vectype1))
10237 return false;
10238 }
10239 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10240 || TREE_CODE (lhs) == FIXED_CST)
10241 dts[0] = vect_constant_def;
10242 else
10243 return false;
10244
10245 if (TREE_CODE (rhs) == SSA_NAME)
10246 {
10247 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10248 &rhs, &slp_op, &dts[1], &vectype2))
10249 return false;
10250 }
10251 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10252 || TREE_CODE (rhs) == FIXED_CST)
10253 dts[1] = vect_constant_def;
10254 else
10255 return false;
10256
10257 if (vectype1 && vectype2
10258 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10259 TYPE_VECTOR_SUBPARTS (vectype2)))
10260 return false;
10261
10262 *comp_vectype = vectype1 ? vectype1 : vectype2;
10263 /* Invariant comparison. */
10264 if (! *comp_vectype)
10265 {
10266 tree scalar_type = TREE_TYPE (lhs);
10267 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10268 *comp_vectype = truth_type_for (vectype);
10269 else
10270 {
10271 /* If we can widen the comparison to match vectype do so. */
10272 if (INTEGRAL_TYPE_P (scalar_type)
10273 && !slp_node
10274 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10275 TYPE_SIZE (TREE_TYPE (vectype))))
10276 scalar_type = build_nonstandard_integer_type
10277 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10278 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10279 slp_node);
10280 }
10281 }
10282
10283 return true;
10284 }
10285
10286 /* vectorizable_condition.
10287
10288 Check if STMT_INFO is conditional modify expression that can be vectorized.
10289 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10290 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10291 at GSI.
10292
10293 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10294
10295 Return true if STMT_INFO is vectorizable in this way. */
10296
10297 static bool
10298 vectorizable_condition (vec_info *vinfo,
10299 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10300 gimple **vec_stmt,
10301 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10302 {
10303 tree scalar_dest = NULL_TREE;
10304 tree vec_dest = NULL_TREE;
10305 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10306 tree then_clause, else_clause;
10307 tree comp_vectype = NULL_TREE;
10308 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10309 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10310 tree vec_compare;
10311 tree new_temp;
10312 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10313 enum vect_def_type dts[4]
10314 = {vect_unknown_def_type, vect_unknown_def_type,
10315 vect_unknown_def_type, vect_unknown_def_type};
10316 int ndts = 4;
10317 int ncopies;
10318 int vec_num;
10319 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10320 int i;
10321 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10322 vec<tree> vec_oprnds0 = vNULL;
10323 vec<tree> vec_oprnds1 = vNULL;
10324 vec<tree> vec_oprnds2 = vNULL;
10325 vec<tree> vec_oprnds3 = vNULL;
10326 tree vec_cmp_type;
10327 bool masked = false;
10328
10329 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10330 return false;
10331
10332 /* Is vectorizable conditional operation? */
10333 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10334 if (!stmt)
10335 return false;
10336
10337 code = gimple_assign_rhs_code (stmt);
10338 if (code != COND_EXPR)
10339 return false;
10340
10341 stmt_vec_info reduc_info = NULL;
10342 int reduc_index = -1;
10343 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10344 bool for_reduction
10345 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10346 if (for_reduction)
10347 {
10348 if (STMT_SLP_TYPE (stmt_info))
10349 return false;
10350 reduc_info = info_for_reduction (vinfo, stmt_info);
10351 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10352 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10353 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10354 || reduc_index != -1);
10355 }
10356 else
10357 {
10358 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10359 return false;
10360 }
10361
10362 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10363 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10364
10365 if (slp_node)
10366 {
10367 ncopies = 1;
10368 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10369 }
10370 else
10371 {
10372 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10373 vec_num = 1;
10374 }
10375
10376 gcc_assert (ncopies >= 1);
10377 if (for_reduction && ncopies > 1)
10378 return false; /* FORNOW */
10379
10380 cond_expr = gimple_assign_rhs1 (stmt);
10381
10382 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10383 &comp_vectype, &dts[0], vectype)
10384 || !comp_vectype)
10385 return false;
10386
10387 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10388 slp_tree then_slp_node, else_slp_node;
10389 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10390 &then_clause, &then_slp_node, &dts[2], &vectype1))
10391 return false;
10392 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10393 &else_clause, &else_slp_node, &dts[3], &vectype2))
10394 return false;
10395
10396 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10397 return false;
10398
10399 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10400 return false;
10401
10402 masked = !COMPARISON_CLASS_P (cond_expr);
10403 vec_cmp_type = truth_type_for (comp_vectype);
10404
10405 if (vec_cmp_type == NULL_TREE)
10406 return false;
10407
10408 cond_code = TREE_CODE (cond_expr);
10409 if (!masked)
10410 {
10411 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10412 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10413 }
10414
10415 /* For conditional reductions, the "then" value needs to be the candidate
10416 value calculated by this iteration while the "else" value needs to be
10417 the result carried over from previous iterations. If the COND_EXPR
10418 is the other way around, we need to swap it. */
10419 bool must_invert_cmp_result = false;
10420 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10421 {
10422 if (masked)
10423 must_invert_cmp_result = true;
10424 else
10425 {
10426 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10427 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10428 if (new_code == ERROR_MARK)
10429 must_invert_cmp_result = true;
10430 else
10431 {
10432 cond_code = new_code;
10433 /* Make sure we don't accidentally use the old condition. */
10434 cond_expr = NULL_TREE;
10435 }
10436 }
10437 std::swap (then_clause, else_clause);
10438 }
10439
10440 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10441 {
10442 /* Boolean values may have another representation in vectors
10443 and therefore we prefer bit operations over comparison for
10444 them (which also works for scalar masks). We store opcodes
10445 to use in bitop1 and bitop2. Statement is vectorized as
10446 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10447 depending on bitop1 and bitop2 arity. */
10448 switch (cond_code)
10449 {
10450 case GT_EXPR:
10451 bitop1 = BIT_NOT_EXPR;
10452 bitop2 = BIT_AND_EXPR;
10453 break;
10454 case GE_EXPR:
10455 bitop1 = BIT_NOT_EXPR;
10456 bitop2 = BIT_IOR_EXPR;
10457 break;
10458 case LT_EXPR:
10459 bitop1 = BIT_NOT_EXPR;
10460 bitop2 = BIT_AND_EXPR;
10461 std::swap (cond_expr0, cond_expr1);
10462 break;
10463 case LE_EXPR:
10464 bitop1 = BIT_NOT_EXPR;
10465 bitop2 = BIT_IOR_EXPR;
10466 std::swap (cond_expr0, cond_expr1);
10467 break;
10468 case NE_EXPR:
10469 bitop1 = BIT_XOR_EXPR;
10470 break;
10471 case EQ_EXPR:
10472 bitop1 = BIT_XOR_EXPR;
10473 bitop2 = BIT_NOT_EXPR;
10474 break;
10475 default:
10476 return false;
10477 }
10478 cond_code = SSA_NAME;
10479 }
10480
10481 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10482 && reduction_type == EXTRACT_LAST_REDUCTION
10483 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10484 {
10485 if (dump_enabled_p ())
10486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10487 "reduction comparison operation not supported.\n");
10488 return false;
10489 }
10490
10491 if (!vec_stmt)
10492 {
10493 if (bitop1 != NOP_EXPR)
10494 {
10495 machine_mode mode = TYPE_MODE (comp_vectype);
10496 optab optab;
10497
10498 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10499 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10500 return false;
10501
10502 if (bitop2 != NOP_EXPR)
10503 {
10504 optab = optab_for_tree_code (bitop2, comp_vectype,
10505 optab_default);
10506 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10507 return false;
10508 }
10509 }
10510
10511 vect_cost_for_stmt kind = vector_stmt;
10512 if (reduction_type == EXTRACT_LAST_REDUCTION)
10513 /* Count one reduction-like operation per vector. */
10514 kind = vec_to_scalar;
10515 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10516 return false;
10517
10518 if (slp_node
10519 && (!vect_maybe_update_slp_op_vectype
10520 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10521 || (op_adjust == 1
10522 && !vect_maybe_update_slp_op_vectype
10523 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10524 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10525 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10526 {
10527 if (dump_enabled_p ())
10528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10529 "incompatible vector types for invariants\n");
10530 return false;
10531 }
10532
10533 if (loop_vinfo && for_reduction
10534 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10535 {
10536 if (reduction_type == EXTRACT_LAST_REDUCTION)
10537 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10538 ncopies * vec_num, vectype, NULL);
10539 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10540 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10541 {
10542 if (dump_enabled_p ())
10543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10544 "conditional reduction prevents the use"
10545 " of partial vectors.\n");
10546 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10547 }
10548 }
10549
10550 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10551 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10552 cost_vec, kind);
10553 return true;
10554 }
10555
10556 /* Transform. */
10557
10558 /* Handle def. */
10559 scalar_dest = gimple_assign_lhs (stmt);
10560 if (reduction_type != EXTRACT_LAST_REDUCTION)
10561 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10562
10563 bool swap_cond_operands = false;
10564
10565 /* See whether another part of the vectorized code applies a loop
10566 mask to the condition, or to its inverse. */
10567
10568 vec_loop_masks *masks = NULL;
10569 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10570 {
10571 if (reduction_type == EXTRACT_LAST_REDUCTION)
10572 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10573 else
10574 {
10575 scalar_cond_masked_key cond (cond_expr, ncopies);
10576 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10577 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10578 else
10579 {
10580 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10581 tree_code orig_code = cond.code;
10582 cond.code = invert_tree_comparison (cond.code, honor_nans);
10583 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10584 {
10585 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10586 cond_code = cond.code;
10587 swap_cond_operands = true;
10588 }
10589 else
10590 {
10591 /* Try the inverse of the current mask. We check if the
10592 inverse mask is live and if so we generate a negate of
10593 the current mask such that we still honor NaNs. */
10594 cond.inverted_p = true;
10595 cond.code = orig_code;
10596 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10597 {
10598 bitop1 = orig_code;
10599 bitop2 = BIT_NOT_EXPR;
10600 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10601 cond_code = cond.code;
10602 swap_cond_operands = true;
10603 }
10604 }
10605 }
10606 }
10607 }
10608
10609 /* Handle cond expr. */
10610 if (masked)
10611 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10612 cond_expr, &vec_oprnds0, comp_vectype,
10613 then_clause, &vec_oprnds2, vectype,
10614 reduction_type != EXTRACT_LAST_REDUCTION
10615 ? else_clause : NULL, &vec_oprnds3, vectype);
10616 else
10617 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10618 cond_expr0, &vec_oprnds0, comp_vectype,
10619 cond_expr1, &vec_oprnds1, comp_vectype,
10620 then_clause, &vec_oprnds2, vectype,
10621 reduction_type != EXTRACT_LAST_REDUCTION
10622 ? else_clause : NULL, &vec_oprnds3, vectype);
10623
10624 /* Arguments are ready. Create the new vector stmt. */
10625 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10626 {
10627 vec_then_clause = vec_oprnds2[i];
10628 if (reduction_type != EXTRACT_LAST_REDUCTION)
10629 vec_else_clause = vec_oprnds3[i];
10630
10631 if (swap_cond_operands)
10632 std::swap (vec_then_clause, vec_else_clause);
10633
10634 if (masked)
10635 vec_compare = vec_cond_lhs;
10636 else
10637 {
10638 vec_cond_rhs = vec_oprnds1[i];
10639 if (bitop1 == NOP_EXPR)
10640 {
10641 gimple_seq stmts = NULL;
10642 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10643 vec_cond_lhs, vec_cond_rhs);
10644 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10645 }
10646 else
10647 {
10648 new_temp = make_ssa_name (vec_cmp_type);
10649 gassign *new_stmt;
10650 if (bitop1 == BIT_NOT_EXPR)
10651 new_stmt = gimple_build_assign (new_temp, bitop1,
10652 vec_cond_rhs);
10653 else
10654 new_stmt
10655 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10656 vec_cond_rhs);
10657 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10658 if (bitop2 == NOP_EXPR)
10659 vec_compare = new_temp;
10660 else if (bitop2 == BIT_NOT_EXPR)
10661 {
10662 /* Instead of doing ~x ? y : z do x ? z : y. */
10663 vec_compare = new_temp;
10664 std::swap (vec_then_clause, vec_else_clause);
10665 }
10666 else
10667 {
10668 vec_compare = make_ssa_name (vec_cmp_type);
10669 new_stmt
10670 = gimple_build_assign (vec_compare, bitop2,
10671 vec_cond_lhs, new_temp);
10672 vect_finish_stmt_generation (vinfo, stmt_info,
10673 new_stmt, gsi);
10674 }
10675 }
10676 }
10677
10678 /* If we decided to apply a loop mask to the result of the vector
10679 comparison, AND the comparison with the mask now. Later passes
10680 should then be able to reuse the AND results between mulitple
10681 vector statements.
10682
10683 For example:
10684 for (int i = 0; i < 100; ++i)
10685 x[i] = y[i] ? z[i] : 10;
10686
10687 results in following optimized GIMPLE:
10688
10689 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10690 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10691 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10692 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10693 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10694 vect_iftmp.11_47, { 10, ... }>;
10695
10696 instead of using a masked and unmasked forms of
10697 vec != { 0, ... } (masked in the MASK_LOAD,
10698 unmasked in the VEC_COND_EXPR). */
10699
10700 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10701 in cases where that's necessary. */
10702
10703 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10704 {
10705 if (!is_gimple_val (vec_compare))
10706 {
10707 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10708 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10709 vec_compare);
10710 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10711 vec_compare = vec_compare_name;
10712 }
10713
10714 if (must_invert_cmp_result)
10715 {
10716 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10717 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10718 BIT_NOT_EXPR,
10719 vec_compare);
10720 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10721 vec_compare = vec_compare_name;
10722 }
10723
10724 if (masks)
10725 {
10726 tree loop_mask
10727 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10728 vectype, i);
10729 tree tmp2 = make_ssa_name (vec_cmp_type);
10730 gassign *g
10731 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10732 loop_mask);
10733 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10734 vec_compare = tmp2;
10735 }
10736 }
10737
10738 gimple *new_stmt;
10739 if (reduction_type == EXTRACT_LAST_REDUCTION)
10740 {
10741 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10742 tree lhs = gimple_get_lhs (old_stmt);
10743 new_stmt = gimple_build_call_internal
10744 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10745 vec_then_clause);
10746 gimple_call_set_lhs (new_stmt, lhs);
10747 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10748 if (old_stmt == gsi_stmt (*gsi))
10749 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10750 else
10751 {
10752 /* In this case we're moving the definition to later in the
10753 block. That doesn't matter because the only uses of the
10754 lhs are in phi statements. */
10755 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10756 gsi_remove (&old_gsi, true);
10757 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10758 }
10759 }
10760 else
10761 {
10762 new_temp = make_ssa_name (vec_dest);
10763 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10764 vec_then_clause, vec_else_clause);
10765 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10766 }
10767 if (slp_node)
10768 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10769 else
10770 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10771 }
10772
10773 if (!slp_node)
10774 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10775
10776 vec_oprnds0.release ();
10777 vec_oprnds1.release ();
10778 vec_oprnds2.release ();
10779 vec_oprnds3.release ();
10780
10781 return true;
10782 }
10783
10784 /* vectorizable_comparison.
10785
10786 Check if STMT_INFO is comparison expression that can be vectorized.
10787 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10788 comparison, put it in VEC_STMT, and insert it at GSI.
10789
10790 Return true if STMT_INFO is vectorizable in this way. */
10791
10792 static bool
10793 vectorizable_comparison (vec_info *vinfo,
10794 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10795 gimple **vec_stmt,
10796 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10797 {
10798 tree lhs, rhs1, rhs2;
10799 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10800 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10801 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10802 tree new_temp;
10803 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10804 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10805 int ndts = 2;
10806 poly_uint64 nunits;
10807 int ncopies;
10808 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10809 int i;
10810 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10811 vec<tree> vec_oprnds0 = vNULL;
10812 vec<tree> vec_oprnds1 = vNULL;
10813 tree mask_type;
10814 tree mask;
10815
10816 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10817 return false;
10818
10819 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10820 return false;
10821
10822 mask_type = vectype;
10823 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10824
10825 if (slp_node)
10826 ncopies = 1;
10827 else
10828 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10829
10830 gcc_assert (ncopies >= 1);
10831 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10832 return false;
10833
10834 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10835 if (!stmt)
10836 return false;
10837
10838 code = gimple_assign_rhs_code (stmt);
10839
10840 if (TREE_CODE_CLASS (code) != tcc_comparison)
10841 return false;
10842
10843 slp_tree slp_rhs1, slp_rhs2;
10844 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10845 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10846 return false;
10847
10848 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10849 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10850 return false;
10851
10852 if (vectype1 && vectype2
10853 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10854 TYPE_VECTOR_SUBPARTS (vectype2)))
10855 return false;
10856
10857 vectype = vectype1 ? vectype1 : vectype2;
10858
10859 /* Invariant comparison. */
10860 if (!vectype)
10861 {
10862 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10863 vectype = mask_type;
10864 else
10865 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10866 slp_node);
10867 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10868 return false;
10869 }
10870 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10871 return false;
10872
10873 /* Can't compare mask and non-mask types. */
10874 if (vectype1 && vectype2
10875 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10876 return false;
10877
10878 /* Boolean values may have another representation in vectors
10879 and therefore we prefer bit operations over comparison for
10880 them (which also works for scalar masks). We store opcodes
10881 to use in bitop1 and bitop2. Statement is vectorized as
10882 BITOP2 (rhs1 BITOP1 rhs2) or
10883 rhs1 BITOP2 (BITOP1 rhs2)
10884 depending on bitop1 and bitop2 arity. */
10885 bool swap_p = false;
10886 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10887 {
10888 if (code == GT_EXPR)
10889 {
10890 bitop1 = BIT_NOT_EXPR;
10891 bitop2 = BIT_AND_EXPR;
10892 }
10893 else if (code == GE_EXPR)
10894 {
10895 bitop1 = BIT_NOT_EXPR;
10896 bitop2 = BIT_IOR_EXPR;
10897 }
10898 else if (code == LT_EXPR)
10899 {
10900 bitop1 = BIT_NOT_EXPR;
10901 bitop2 = BIT_AND_EXPR;
10902 swap_p = true;
10903 }
10904 else if (code == LE_EXPR)
10905 {
10906 bitop1 = BIT_NOT_EXPR;
10907 bitop2 = BIT_IOR_EXPR;
10908 swap_p = true;
10909 }
10910 else
10911 {
10912 bitop1 = BIT_XOR_EXPR;
10913 if (code == EQ_EXPR)
10914 bitop2 = BIT_NOT_EXPR;
10915 }
10916 }
10917
10918 if (!vec_stmt)
10919 {
10920 if (bitop1 == NOP_EXPR)
10921 {
10922 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10923 return false;
10924 }
10925 else
10926 {
10927 machine_mode mode = TYPE_MODE (vectype);
10928 optab optab;
10929
10930 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10931 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10932 return false;
10933
10934 if (bitop2 != NOP_EXPR)
10935 {
10936 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10937 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10938 return false;
10939 }
10940 }
10941
10942 /* Put types on constant and invariant SLP children. */
10943 if (slp_node
10944 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10945 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10946 {
10947 if (dump_enabled_p ())
10948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10949 "incompatible vector types for invariants\n");
10950 return false;
10951 }
10952
10953 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10954 vect_model_simple_cost (vinfo, stmt_info,
10955 ncopies * (1 + (bitop2 != NOP_EXPR)),
10956 dts, ndts, slp_node, cost_vec);
10957 return true;
10958 }
10959
10960 /* Transform. */
10961
10962 /* Handle def. */
10963 lhs = gimple_assign_lhs (stmt);
10964 mask = vect_create_destination_var (lhs, mask_type);
10965
10966 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10967 rhs1, &vec_oprnds0, vectype,
10968 rhs2, &vec_oprnds1, vectype);
10969 if (swap_p)
10970 std::swap (vec_oprnds0, vec_oprnds1);
10971
10972 /* Arguments are ready. Create the new vector stmt. */
10973 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10974 {
10975 gimple *new_stmt;
10976 vec_rhs2 = vec_oprnds1[i];
10977
10978 new_temp = make_ssa_name (mask);
10979 if (bitop1 == NOP_EXPR)
10980 {
10981 new_stmt = gimple_build_assign (new_temp, code,
10982 vec_rhs1, vec_rhs2);
10983 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10984 }
10985 else
10986 {
10987 if (bitop1 == BIT_NOT_EXPR)
10988 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10989 else
10990 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10991 vec_rhs2);
10992 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10993 if (bitop2 != NOP_EXPR)
10994 {
10995 tree res = make_ssa_name (mask);
10996 if (bitop2 == BIT_NOT_EXPR)
10997 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10998 else
10999 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11000 new_temp);
11001 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11002 }
11003 }
11004 if (slp_node)
11005 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11006 else
11007 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11008 }
11009
11010 if (!slp_node)
11011 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11012
11013 vec_oprnds0.release ();
11014 vec_oprnds1.release ();
11015
11016 return true;
11017 }
11018
11019 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11020 can handle all live statements in the node. Otherwise return true
11021 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11022 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11023
11024 static bool
11025 can_vectorize_live_stmts (vec_info *vinfo,
11026 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11027 slp_tree slp_node, slp_instance slp_node_instance,
11028 bool vec_stmt_p,
11029 stmt_vector_for_cost *cost_vec)
11030 {
11031 if (slp_node)
11032 {
11033 stmt_vec_info slp_stmt_info;
11034 unsigned int i;
11035 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11036 {
11037 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11038 && !vectorizable_live_operation (vinfo,
11039 slp_stmt_info, gsi, slp_node,
11040 slp_node_instance, i,
11041 vec_stmt_p, cost_vec))
11042 return false;
11043 }
11044 }
11045 else if (STMT_VINFO_LIVE_P (stmt_info)
11046 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11047 slp_node, slp_node_instance, -1,
11048 vec_stmt_p, cost_vec))
11049 return false;
11050
11051 return true;
11052 }
11053
11054 /* Make sure the statement is vectorizable. */
11055
11056 opt_result
11057 vect_analyze_stmt (vec_info *vinfo,
11058 stmt_vec_info stmt_info, bool *need_to_vectorize,
11059 slp_tree node, slp_instance node_instance,
11060 stmt_vector_for_cost *cost_vec)
11061 {
11062 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11063 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11064 bool ok;
11065 gimple_seq pattern_def_seq;
11066
11067 if (dump_enabled_p ())
11068 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11069 stmt_info->stmt);
11070
11071 if (gimple_has_volatile_ops (stmt_info->stmt))
11072 return opt_result::failure_at (stmt_info->stmt,
11073 "not vectorized:"
11074 " stmt has volatile operands: %G\n",
11075 stmt_info->stmt);
11076
11077 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11078 && node == NULL
11079 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11080 {
11081 gimple_stmt_iterator si;
11082
11083 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11084 {
11085 stmt_vec_info pattern_def_stmt_info
11086 = vinfo->lookup_stmt (gsi_stmt (si));
11087 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11088 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11089 {
11090 /* Analyze def stmt of STMT if it's a pattern stmt. */
11091 if (dump_enabled_p ())
11092 dump_printf_loc (MSG_NOTE, vect_location,
11093 "==> examining pattern def statement: %G",
11094 pattern_def_stmt_info->stmt);
11095
11096 opt_result res
11097 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11098 need_to_vectorize, node, node_instance,
11099 cost_vec);
11100 if (!res)
11101 return res;
11102 }
11103 }
11104 }
11105
11106 /* Skip stmts that do not need to be vectorized. In loops this is expected
11107 to include:
11108 - the COND_EXPR which is the loop exit condition
11109 - any LABEL_EXPRs in the loop
11110 - computations that are used only for array indexing or loop control.
11111 In basic blocks we only analyze statements that are a part of some SLP
11112 instance, therefore, all the statements are relevant.
11113
11114 Pattern statement needs to be analyzed instead of the original statement
11115 if the original statement is not relevant. Otherwise, we analyze both
11116 statements. In basic blocks we are called from some SLP instance
11117 traversal, don't analyze pattern stmts instead, the pattern stmts
11118 already will be part of SLP instance. */
11119
11120 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11121 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11122 && !STMT_VINFO_LIVE_P (stmt_info))
11123 {
11124 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11125 && pattern_stmt_info
11126 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11127 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11128 {
11129 /* Analyze PATTERN_STMT instead of the original stmt. */
11130 stmt_info = pattern_stmt_info;
11131 if (dump_enabled_p ())
11132 dump_printf_loc (MSG_NOTE, vect_location,
11133 "==> examining pattern statement: %G",
11134 stmt_info->stmt);
11135 }
11136 else
11137 {
11138 if (dump_enabled_p ())
11139 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11140
11141 return opt_result::success ();
11142 }
11143 }
11144 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11145 && node == NULL
11146 && pattern_stmt_info
11147 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11148 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11149 {
11150 /* Analyze PATTERN_STMT too. */
11151 if (dump_enabled_p ())
11152 dump_printf_loc (MSG_NOTE, vect_location,
11153 "==> examining pattern statement: %G",
11154 pattern_stmt_info->stmt);
11155
11156 opt_result res
11157 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11158 node_instance, cost_vec);
11159 if (!res)
11160 return res;
11161 }
11162
11163 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11164 {
11165 case vect_internal_def:
11166 break;
11167
11168 case vect_reduction_def:
11169 case vect_nested_cycle:
11170 gcc_assert (!bb_vinfo
11171 && (relevance == vect_used_in_outer
11172 || relevance == vect_used_in_outer_by_reduction
11173 || relevance == vect_used_by_reduction
11174 || relevance == vect_unused_in_scope
11175 || relevance == vect_used_only_live));
11176 break;
11177
11178 case vect_induction_def:
11179 case vect_first_order_recurrence:
11180 gcc_assert (!bb_vinfo);
11181 break;
11182
11183 case vect_constant_def:
11184 case vect_external_def:
11185 case vect_unknown_def_type:
11186 default:
11187 gcc_unreachable ();
11188 }
11189
11190 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11191 if (node)
11192 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11193
11194 if (STMT_VINFO_RELEVANT_P (stmt_info))
11195 {
11196 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11197 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11198 || (call && gimple_call_lhs (call) == NULL_TREE));
11199 *need_to_vectorize = true;
11200 }
11201
11202 if (PURE_SLP_STMT (stmt_info) && !node)
11203 {
11204 if (dump_enabled_p ())
11205 dump_printf_loc (MSG_NOTE, vect_location,
11206 "handled only by SLP analysis\n");
11207 return opt_result::success ();
11208 }
11209
11210 ok = true;
11211 if (!bb_vinfo
11212 && (STMT_VINFO_RELEVANT_P (stmt_info)
11213 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11214 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11215 -mveclibabi= takes preference over library functions with
11216 the simd attribute. */
11217 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11218 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11219 cost_vec)
11220 || vectorizable_conversion (vinfo, stmt_info,
11221 NULL, NULL, node, cost_vec)
11222 || vectorizable_operation (vinfo, stmt_info,
11223 NULL, NULL, node, cost_vec)
11224 || vectorizable_assignment (vinfo, stmt_info,
11225 NULL, NULL, node, cost_vec)
11226 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11227 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11228 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11229 node, node_instance, cost_vec)
11230 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11231 NULL, node, cost_vec)
11232 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11233 || vectorizable_condition (vinfo, stmt_info,
11234 NULL, NULL, node, cost_vec)
11235 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11236 cost_vec)
11237 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11238 stmt_info, NULL, node)
11239 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11240 stmt_info, NULL, node, cost_vec));
11241 else
11242 {
11243 if (bb_vinfo)
11244 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11245 || vectorizable_simd_clone_call (vinfo, stmt_info,
11246 NULL, NULL, node, cost_vec)
11247 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11248 cost_vec)
11249 || vectorizable_shift (vinfo, stmt_info,
11250 NULL, NULL, node, cost_vec)
11251 || vectorizable_operation (vinfo, stmt_info,
11252 NULL, NULL, node, cost_vec)
11253 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11254 cost_vec)
11255 || vectorizable_load (vinfo, stmt_info,
11256 NULL, NULL, node, cost_vec)
11257 || vectorizable_store (vinfo, stmt_info,
11258 NULL, NULL, node, cost_vec)
11259 || vectorizable_condition (vinfo, stmt_info,
11260 NULL, NULL, node, cost_vec)
11261 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11262 cost_vec)
11263 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11264 }
11265
11266 if (node)
11267 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11268
11269 if (!ok)
11270 return opt_result::failure_at (stmt_info->stmt,
11271 "not vectorized:"
11272 " relevant stmt not supported: %G",
11273 stmt_info->stmt);
11274
11275 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11276 need extra handling, except for vectorizable reductions. */
11277 if (!bb_vinfo
11278 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11279 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11280 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11281 stmt_info, NULL, node, node_instance,
11282 false, cost_vec))
11283 return opt_result::failure_at (stmt_info->stmt,
11284 "not vectorized:"
11285 " live stmt not supported: %G",
11286 stmt_info->stmt);
11287
11288 return opt_result::success ();
11289 }
11290
11291
11292 /* Function vect_transform_stmt.
11293
11294 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11295
11296 bool
11297 vect_transform_stmt (vec_info *vinfo,
11298 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11299 slp_tree slp_node, slp_instance slp_node_instance)
11300 {
11301 bool is_store = false;
11302 gimple *vec_stmt = NULL;
11303 bool done;
11304
11305 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11306
11307 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11308 if (slp_node)
11309 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11310
11311 switch (STMT_VINFO_TYPE (stmt_info))
11312 {
11313 case type_demotion_vec_info_type:
11314 case type_promotion_vec_info_type:
11315 case type_conversion_vec_info_type:
11316 done = vectorizable_conversion (vinfo, stmt_info,
11317 gsi, &vec_stmt, slp_node, NULL);
11318 gcc_assert (done);
11319 break;
11320
11321 case induc_vec_info_type:
11322 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11323 stmt_info, &vec_stmt, slp_node,
11324 NULL);
11325 gcc_assert (done);
11326 break;
11327
11328 case shift_vec_info_type:
11329 done = vectorizable_shift (vinfo, stmt_info,
11330 gsi, &vec_stmt, slp_node, NULL);
11331 gcc_assert (done);
11332 break;
11333
11334 case op_vec_info_type:
11335 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11336 NULL);
11337 gcc_assert (done);
11338 break;
11339
11340 case assignment_vec_info_type:
11341 done = vectorizable_assignment (vinfo, stmt_info,
11342 gsi, &vec_stmt, slp_node, NULL);
11343 gcc_assert (done);
11344 break;
11345
11346 case load_vec_info_type:
11347 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11348 NULL);
11349 gcc_assert (done);
11350 break;
11351
11352 case store_vec_info_type:
11353 done = vectorizable_store (vinfo, stmt_info,
11354 gsi, &vec_stmt, slp_node, NULL);
11355 gcc_assert (done);
11356 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11357 {
11358 /* In case of interleaving, the whole chain is vectorized when the
11359 last store in the chain is reached. Store stmts before the last
11360 one are skipped, and there vec_stmt_info shouldn't be freed
11361 meanwhile. */
11362 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11363 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11364 is_store = true;
11365 }
11366 else
11367 is_store = true;
11368 break;
11369
11370 case condition_vec_info_type:
11371 done = vectorizable_condition (vinfo, stmt_info,
11372 gsi, &vec_stmt, slp_node, NULL);
11373 gcc_assert (done);
11374 break;
11375
11376 case comparison_vec_info_type:
11377 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11378 slp_node, NULL);
11379 gcc_assert (done);
11380 break;
11381
11382 case call_vec_info_type:
11383 done = vectorizable_call (vinfo, stmt_info,
11384 gsi, &vec_stmt, slp_node, NULL);
11385 break;
11386
11387 case call_simd_clone_vec_info_type:
11388 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11389 slp_node, NULL);
11390 break;
11391
11392 case reduc_vec_info_type:
11393 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11394 gsi, &vec_stmt, slp_node);
11395 gcc_assert (done);
11396 break;
11397
11398 case cycle_phi_info_type:
11399 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11400 &vec_stmt, slp_node, slp_node_instance);
11401 gcc_assert (done);
11402 break;
11403
11404 case lc_phi_info_type:
11405 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11406 stmt_info, &vec_stmt, slp_node);
11407 gcc_assert (done);
11408 break;
11409
11410 case recurr_info_type:
11411 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11412 stmt_info, &vec_stmt, slp_node, NULL);
11413 gcc_assert (done);
11414 break;
11415
11416 case phi_info_type:
11417 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11418 gcc_assert (done);
11419 break;
11420
11421 default:
11422 if (!STMT_VINFO_LIVE_P (stmt_info))
11423 {
11424 if (dump_enabled_p ())
11425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11426 "stmt not supported.\n");
11427 gcc_unreachable ();
11428 }
11429 done = true;
11430 }
11431
11432 if (!slp_node && vec_stmt)
11433 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11434
11435 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11436 {
11437 /* Handle stmts whose DEF is used outside the loop-nest that is
11438 being vectorized. */
11439 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11440 slp_node_instance, true, NULL);
11441 gcc_assert (done);
11442 }
11443
11444 if (slp_node)
11445 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11446
11447 return is_store;
11448 }
11449
11450
11451 /* Remove a group of stores (for SLP or interleaving), free their
11452 stmt_vec_info. */
11453
11454 void
11455 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11456 {
11457 stmt_vec_info next_stmt_info = first_stmt_info;
11458
11459 while (next_stmt_info)
11460 {
11461 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11462 next_stmt_info = vect_orig_stmt (next_stmt_info);
11463 /* Free the attached stmt_vec_info and remove the stmt. */
11464 vinfo->remove_stmt (next_stmt_info);
11465 next_stmt_info = tmp;
11466 }
11467 }
11468
11469 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11470 elements of type SCALAR_TYPE, or null if the target doesn't support
11471 such a type.
11472
11473 If NUNITS is zero, return a vector type that contains elements of
11474 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11475
11476 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11477 for this vectorization region and want to "autodetect" the best choice.
11478 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11479 and we want the new type to be interoperable with it. PREVAILING_MODE
11480 in this case can be a scalar integer mode or a vector mode; when it
11481 is a vector mode, the function acts like a tree-level version of
11482 related_vector_mode. */
11483
11484 tree
11485 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11486 tree scalar_type, poly_uint64 nunits)
11487 {
11488 tree orig_scalar_type = scalar_type;
11489 scalar_mode inner_mode;
11490 machine_mode simd_mode;
11491 tree vectype;
11492
11493 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11494 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11495 return NULL_TREE;
11496
11497 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11498
11499 /* Interoperability between modes requires one to be a constant multiple
11500 of the other, so that the number of vectors required for each operation
11501 is a compile-time constant. */
11502 if (prevailing_mode != VOIDmode
11503 && !constant_multiple_p (nunits * nbytes,
11504 GET_MODE_SIZE (prevailing_mode))
11505 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
11506 nunits * nbytes))
11507 return NULL_TREE;
11508
11509 /* For vector types of elements whose mode precision doesn't
11510 match their types precision we use a element type of mode
11511 precision. The vectorization routines will have to make sure
11512 they support the proper result truncation/extension.
11513 We also make sure to build vector types with INTEGER_TYPE
11514 component type only. */
11515 if (INTEGRAL_TYPE_P (scalar_type)
11516 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11517 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11518 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11519 TYPE_UNSIGNED (scalar_type));
11520
11521 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11522 When the component mode passes the above test simply use a type
11523 corresponding to that mode. The theory is that any use that
11524 would cause problems with this will disable vectorization anyway. */
11525 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11526 && !INTEGRAL_TYPE_P (scalar_type))
11527 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11528
11529 /* We can't build a vector type of elements with alignment bigger than
11530 their size. */
11531 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11532 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11533 TYPE_UNSIGNED (scalar_type));
11534
11535 /* If we felt back to using the mode fail if there was
11536 no scalar type for it. */
11537 if (scalar_type == NULL_TREE)
11538 return NULL_TREE;
11539
11540 /* If no prevailing mode was supplied, use the mode the target prefers.
11541 Otherwise lookup a vector mode based on the prevailing mode. */
11542 if (prevailing_mode == VOIDmode)
11543 {
11544 gcc_assert (known_eq (nunits, 0U));
11545 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11546 if (SCALAR_INT_MODE_P (simd_mode))
11547 {
11548 /* Traditional behavior is not to take the integer mode
11549 literally, but simply to use it as a way of determining
11550 the vector size. It is up to mode_for_vector to decide
11551 what the TYPE_MODE should be.
11552
11553 Note that nunits == 1 is allowed in order to support single
11554 element vector types. */
11555 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11556 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11557 return NULL_TREE;
11558 }
11559 }
11560 else if (SCALAR_INT_MODE_P (prevailing_mode)
11561 || !related_vector_mode (prevailing_mode,
11562 inner_mode, nunits).exists (&simd_mode))
11563 {
11564 /* Fall back to using mode_for_vector, mostly in the hope of being
11565 able to use an integer mode. */
11566 if (known_eq (nunits, 0U)
11567 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11568 return NULL_TREE;
11569
11570 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11571 return NULL_TREE;
11572 }
11573
11574 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11575
11576 /* In cases where the mode was chosen by mode_for_vector, check that
11577 the target actually supports the chosen mode, or that it at least
11578 allows the vector mode to be replaced by a like-sized integer. */
11579 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11580 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11581 return NULL_TREE;
11582
11583 /* Re-attach the address-space qualifier if we canonicalized the scalar
11584 type. */
11585 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11586 return build_qualified_type
11587 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11588
11589 return vectype;
11590 }
11591
11592 /* Function get_vectype_for_scalar_type.
11593
11594 Returns the vector type corresponding to SCALAR_TYPE as supported
11595 by the target. If GROUP_SIZE is nonzero and we're performing BB
11596 vectorization, make sure that the number of elements in the vector
11597 is no bigger than GROUP_SIZE. */
11598
11599 tree
11600 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11601 unsigned int group_size)
11602 {
11603 /* For BB vectorization, we should always have a group size once we've
11604 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11605 are tentative requests during things like early data reference
11606 analysis and pattern recognition. */
11607 if (is_a <bb_vec_info> (vinfo))
11608 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11609 else
11610 group_size = 0;
11611
11612 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11613 scalar_type);
11614 if (vectype && vinfo->vector_mode == VOIDmode)
11615 vinfo->vector_mode = TYPE_MODE (vectype);
11616
11617 /* Register the natural choice of vector type, before the group size
11618 has been applied. */
11619 if (vectype)
11620 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11621
11622 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11623 try again with an explicit number of elements. */
11624 if (vectype
11625 && group_size
11626 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11627 {
11628 /* Start with the biggest number of units that fits within
11629 GROUP_SIZE and halve it until we find a valid vector type.
11630 Usually either the first attempt will succeed or all will
11631 fail (in the latter case because GROUP_SIZE is too small
11632 for the target), but it's possible that a target could have
11633 a hole between supported vector types.
11634
11635 If GROUP_SIZE is not a power of 2, this has the effect of
11636 trying the largest power of 2 that fits within the group,
11637 even though the group is not a multiple of that vector size.
11638 The BB vectorizer will then try to carve up the group into
11639 smaller pieces. */
11640 unsigned int nunits = 1 << floor_log2 (group_size);
11641 do
11642 {
11643 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11644 scalar_type, nunits);
11645 nunits /= 2;
11646 }
11647 while (nunits > 1 && !vectype);
11648 }
11649
11650 return vectype;
11651 }
11652
11653 /* Return the vector type corresponding to SCALAR_TYPE as supported
11654 by the target. NODE, if nonnull, is the SLP tree node that will
11655 use the returned vector type. */
11656
11657 tree
11658 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11659 {
11660 unsigned int group_size = 0;
11661 if (node)
11662 group_size = SLP_TREE_LANES (node);
11663 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11664 }
11665
11666 /* Function get_mask_type_for_scalar_type.
11667
11668 Returns the mask type corresponding to a result of comparison
11669 of vectors of specified SCALAR_TYPE as supported by target.
11670 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11671 make sure that the number of elements in the vector is no bigger
11672 than GROUP_SIZE. */
11673
11674 tree
11675 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11676 unsigned int group_size)
11677 {
11678 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11679
11680 if (!vectype)
11681 return NULL;
11682
11683 return truth_type_for (vectype);
11684 }
11685
11686 /* Function get_same_sized_vectype
11687
11688 Returns a vector type corresponding to SCALAR_TYPE of size
11689 VECTOR_TYPE if supported by the target. */
11690
11691 tree
11692 get_same_sized_vectype (tree scalar_type, tree vector_type)
11693 {
11694 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11695 return truth_type_for (vector_type);
11696
11697 poly_uint64 nunits;
11698 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11699 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11700 return NULL_TREE;
11701
11702 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11703 scalar_type, nunits);
11704 }
11705
11706 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11707 would not change the chosen vector modes. */
11708
11709 bool
11710 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11711 {
11712 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11713 i != vinfo->used_vector_modes.end (); ++i)
11714 if (!VECTOR_MODE_P (*i)
11715 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11716 return false;
11717 return true;
11718 }
11719
11720 /* Function vect_is_simple_use.
11721
11722 Input:
11723 VINFO - the vect info of the loop or basic block that is being vectorized.
11724 OPERAND - operand in the loop or bb.
11725 Output:
11726 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11727 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11728 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11729 the definition could be anywhere in the function
11730 DT - the type of definition
11731
11732 Returns whether a stmt with OPERAND can be vectorized.
11733 For loops, supportable operands are constants, loop invariants, and operands
11734 that are defined by the current iteration of the loop. Unsupportable
11735 operands are those that are defined by a previous iteration of the loop (as
11736 is the case in reduction/induction computations).
11737 For basic blocks, supportable operands are constants and bb invariants.
11738 For now, operands defined outside the basic block are not supported. */
11739
11740 bool
11741 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11742 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11743 {
11744 if (def_stmt_info_out)
11745 *def_stmt_info_out = NULL;
11746 if (def_stmt_out)
11747 *def_stmt_out = NULL;
11748 *dt = vect_unknown_def_type;
11749
11750 if (dump_enabled_p ())
11751 {
11752 dump_printf_loc (MSG_NOTE, vect_location,
11753 "vect_is_simple_use: operand ");
11754 if (TREE_CODE (operand) == SSA_NAME
11755 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11756 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11757 else
11758 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11759 }
11760
11761 if (CONSTANT_CLASS_P (operand))
11762 *dt = vect_constant_def;
11763 else if (is_gimple_min_invariant (operand))
11764 *dt = vect_external_def;
11765 else if (TREE_CODE (operand) != SSA_NAME)
11766 *dt = vect_unknown_def_type;
11767 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11768 *dt = vect_external_def;
11769 else
11770 {
11771 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11772 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11773 if (!stmt_vinfo)
11774 *dt = vect_external_def;
11775 else
11776 {
11777 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11778 def_stmt = stmt_vinfo->stmt;
11779 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11780 if (def_stmt_info_out)
11781 *def_stmt_info_out = stmt_vinfo;
11782 }
11783 if (def_stmt_out)
11784 *def_stmt_out = def_stmt;
11785 }
11786
11787 if (dump_enabled_p ())
11788 {
11789 dump_printf (MSG_NOTE, ", type of def: ");
11790 switch (*dt)
11791 {
11792 case vect_uninitialized_def:
11793 dump_printf (MSG_NOTE, "uninitialized\n");
11794 break;
11795 case vect_constant_def:
11796 dump_printf (MSG_NOTE, "constant\n");
11797 break;
11798 case vect_external_def:
11799 dump_printf (MSG_NOTE, "external\n");
11800 break;
11801 case vect_internal_def:
11802 dump_printf (MSG_NOTE, "internal\n");
11803 break;
11804 case vect_induction_def:
11805 dump_printf (MSG_NOTE, "induction\n");
11806 break;
11807 case vect_reduction_def:
11808 dump_printf (MSG_NOTE, "reduction\n");
11809 break;
11810 case vect_double_reduction_def:
11811 dump_printf (MSG_NOTE, "double reduction\n");
11812 break;
11813 case vect_nested_cycle:
11814 dump_printf (MSG_NOTE, "nested cycle\n");
11815 break;
11816 case vect_first_order_recurrence:
11817 dump_printf (MSG_NOTE, "first order recurrence\n");
11818 break;
11819 case vect_unknown_def_type:
11820 dump_printf (MSG_NOTE, "unknown\n");
11821 break;
11822 }
11823 }
11824
11825 if (*dt == vect_unknown_def_type)
11826 {
11827 if (dump_enabled_p ())
11828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11829 "Unsupported pattern.\n");
11830 return false;
11831 }
11832
11833 return true;
11834 }
11835
11836 /* Function vect_is_simple_use.
11837
11838 Same as vect_is_simple_use but also determines the vector operand
11839 type of OPERAND and stores it to *VECTYPE. If the definition of
11840 OPERAND is vect_uninitialized_def, vect_constant_def or
11841 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11842 is responsible to compute the best suited vector type for the
11843 scalar operand. */
11844
11845 bool
11846 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11847 tree *vectype, stmt_vec_info *def_stmt_info_out,
11848 gimple **def_stmt_out)
11849 {
11850 stmt_vec_info def_stmt_info;
11851 gimple *def_stmt;
11852 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11853 return false;
11854
11855 if (def_stmt_out)
11856 *def_stmt_out = def_stmt;
11857 if (def_stmt_info_out)
11858 *def_stmt_info_out = def_stmt_info;
11859
11860 /* Now get a vector type if the def is internal, otherwise supply
11861 NULL_TREE and leave it up to the caller to figure out a proper
11862 type for the use stmt. */
11863 if (*dt == vect_internal_def
11864 || *dt == vect_induction_def
11865 || *dt == vect_reduction_def
11866 || *dt == vect_double_reduction_def
11867 || *dt == vect_nested_cycle
11868 || *dt == vect_first_order_recurrence)
11869 {
11870 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11871 gcc_assert (*vectype != NULL_TREE);
11872 if (dump_enabled_p ())
11873 dump_printf_loc (MSG_NOTE, vect_location,
11874 "vect_is_simple_use: vectype %T\n", *vectype);
11875 }
11876 else if (*dt == vect_uninitialized_def
11877 || *dt == vect_constant_def
11878 || *dt == vect_external_def)
11879 *vectype = NULL_TREE;
11880 else
11881 gcc_unreachable ();
11882
11883 return true;
11884 }
11885
11886 /* Function vect_is_simple_use.
11887
11888 Same as vect_is_simple_use but determines the operand by operand
11889 position OPERAND from either STMT or SLP_NODE, filling in *OP
11890 and *SLP_DEF (when SLP_NODE is not NULL). */
11891
11892 bool
11893 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11894 unsigned operand, tree *op, slp_tree *slp_def,
11895 enum vect_def_type *dt,
11896 tree *vectype, stmt_vec_info *def_stmt_info_out)
11897 {
11898 if (slp_node)
11899 {
11900 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11901 *slp_def = child;
11902 *vectype = SLP_TREE_VECTYPE (child);
11903 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11904 {
11905 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11906 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11907 }
11908 else
11909 {
11910 if (def_stmt_info_out)
11911 *def_stmt_info_out = NULL;
11912 *op = SLP_TREE_SCALAR_OPS (child)[0];
11913 *dt = SLP_TREE_DEF_TYPE (child);
11914 return true;
11915 }
11916 }
11917 else
11918 {
11919 *slp_def = NULL;
11920 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11921 {
11922 if (gimple_assign_rhs_code (ass) == COND_EXPR
11923 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11924 {
11925 if (operand < 2)
11926 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11927 else
11928 *op = gimple_op (ass, operand);
11929 }
11930 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11931 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11932 else
11933 *op = gimple_op (ass, operand + 1);
11934 }
11935 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11936 *op = gimple_call_arg (call, operand);
11937 else
11938 gcc_unreachable ();
11939 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11940 }
11941 }
11942
11943 /* If OP is not NULL and is external or constant update its vector
11944 type with VECTYPE. Returns true if successful or false if not,
11945 for example when conflicting vector types are present. */
11946
11947 bool
11948 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11949 {
11950 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11951 return true;
11952 if (SLP_TREE_VECTYPE (op))
11953 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11954 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
11955 should be handled by patters. Allow vect_constant_def for now. */
11956 if (VECTOR_BOOLEAN_TYPE_P (vectype)
11957 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
11958 return false;
11959 SLP_TREE_VECTYPE (op) = vectype;
11960 return true;
11961 }
11962
11963 /* Function supportable_widening_operation
11964
11965 Check whether an operation represented by the code CODE is a
11966 widening operation that is supported by the target platform in
11967 vector form (i.e., when operating on arguments of type VECTYPE_IN
11968 producing a result of type VECTYPE_OUT).
11969
11970 Widening operations we currently support are NOP (CONVERT), FLOAT,
11971 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11972 are supported by the target platform either directly (via vector
11973 tree-codes), or via target builtins.
11974
11975 Output:
11976 - CODE1 and CODE2 are codes of vector operations to be used when
11977 vectorizing the operation, if available.
11978 - MULTI_STEP_CVT determines the number of required intermediate steps in
11979 case of multi-step conversion (like char->short->int - in that case
11980 MULTI_STEP_CVT will be 1).
11981 - INTERM_TYPES contains the intermediate type required to perform the
11982 widening operation (short in the above example). */
11983
11984 bool
11985 supportable_widening_operation (vec_info *vinfo,
11986 enum tree_code code, stmt_vec_info stmt_info,
11987 tree vectype_out, tree vectype_in,
11988 enum tree_code *code1, enum tree_code *code2,
11989 int *multi_step_cvt,
11990 vec<tree> *interm_types)
11991 {
11992 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11993 class loop *vect_loop = NULL;
11994 machine_mode vec_mode;
11995 enum insn_code icode1, icode2;
11996 optab optab1, optab2;
11997 tree vectype = vectype_in;
11998 tree wide_vectype = vectype_out;
11999 enum tree_code c1, c2;
12000 int i;
12001 tree prev_type, intermediate_type;
12002 machine_mode intermediate_mode, prev_mode;
12003 optab optab3, optab4;
12004
12005 *multi_step_cvt = 0;
12006 if (loop_info)
12007 vect_loop = LOOP_VINFO_LOOP (loop_info);
12008
12009 switch (code)
12010 {
12011 case WIDEN_MULT_EXPR:
12012 /* The result of a vectorized widening operation usually requires
12013 two vectors (because the widened results do not fit into one vector).
12014 The generated vector results would normally be expected to be
12015 generated in the same order as in the original scalar computation,
12016 i.e. if 8 results are generated in each vector iteration, they are
12017 to be organized as follows:
12018 vect1: [res1,res2,res3,res4],
12019 vect2: [res5,res6,res7,res8].
12020
12021 However, in the special case that the result of the widening
12022 operation is used in a reduction computation only, the order doesn't
12023 matter (because when vectorizing a reduction we change the order of
12024 the computation). Some targets can take advantage of this and
12025 generate more efficient code. For example, targets like Altivec,
12026 that support widen_mult using a sequence of {mult_even,mult_odd}
12027 generate the following vectors:
12028 vect1: [res1,res3,res5,res7],
12029 vect2: [res2,res4,res6,res8].
12030
12031 When vectorizing outer-loops, we execute the inner-loop sequentially
12032 (each vectorized inner-loop iteration contributes to VF outer-loop
12033 iterations in parallel). We therefore don't allow to change the
12034 order of the computation in the inner-loop during outer-loop
12035 vectorization. */
12036 /* TODO: Another case in which order doesn't *really* matter is when we
12037 widen and then contract again, e.g. (short)((int)x * y >> 8).
12038 Normally, pack_trunc performs an even/odd permute, whereas the
12039 repack from an even/odd expansion would be an interleave, which
12040 would be significantly simpler for e.g. AVX2. */
12041 /* In any case, in order to avoid duplicating the code below, recurse
12042 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12043 are properly set up for the caller. If we fail, we'll continue with
12044 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12045 if (vect_loop
12046 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12047 && !nested_in_vect_loop_p (vect_loop, stmt_info)
12048 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12049 stmt_info, vectype_out,
12050 vectype_in, code1, code2,
12051 multi_step_cvt, interm_types))
12052 {
12053 /* Elements in a vector with vect_used_by_reduction property cannot
12054 be reordered if the use chain with this property does not have the
12055 same operation. One such an example is s += a * b, where elements
12056 in a and b cannot be reordered. Here we check if the vector defined
12057 by STMT is only directly used in the reduction statement. */
12058 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12059 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12060 if (use_stmt_info
12061 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12062 return true;
12063 }
12064 c1 = VEC_WIDEN_MULT_LO_EXPR;
12065 c2 = VEC_WIDEN_MULT_HI_EXPR;
12066 break;
12067
12068 case DOT_PROD_EXPR:
12069 c1 = DOT_PROD_EXPR;
12070 c2 = DOT_PROD_EXPR;
12071 break;
12072
12073 case SAD_EXPR:
12074 c1 = SAD_EXPR;
12075 c2 = SAD_EXPR;
12076 break;
12077
12078 case VEC_WIDEN_MULT_EVEN_EXPR:
12079 /* Support the recursion induced just above. */
12080 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12081 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12082 break;
12083
12084 case WIDEN_LSHIFT_EXPR:
12085 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12086 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12087 break;
12088
12089 case WIDEN_PLUS_EXPR:
12090 c1 = VEC_WIDEN_PLUS_LO_EXPR;
12091 c2 = VEC_WIDEN_PLUS_HI_EXPR;
12092 break;
12093
12094 case WIDEN_MINUS_EXPR:
12095 c1 = VEC_WIDEN_MINUS_LO_EXPR;
12096 c2 = VEC_WIDEN_MINUS_HI_EXPR;
12097 break;
12098
12099 CASE_CONVERT:
12100 c1 = VEC_UNPACK_LO_EXPR;
12101 c2 = VEC_UNPACK_HI_EXPR;
12102 break;
12103
12104 case FLOAT_EXPR:
12105 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12106 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12107 break;
12108
12109 case FIX_TRUNC_EXPR:
12110 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12111 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12112 break;
12113
12114 default:
12115 gcc_unreachable ();
12116 }
12117
12118 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12119 std::swap (c1, c2);
12120
12121 if (code == FIX_TRUNC_EXPR)
12122 {
12123 /* The signedness is determined from output operand. */
12124 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12125 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12126 }
12127 else if (CONVERT_EXPR_CODE_P (code)
12128 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12129 && VECTOR_BOOLEAN_TYPE_P (vectype)
12130 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12131 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12132 {
12133 /* If the input and result modes are the same, a different optab
12134 is needed where we pass in the number of units in vectype. */
12135 optab1 = vec_unpacks_sbool_lo_optab;
12136 optab2 = vec_unpacks_sbool_hi_optab;
12137 }
12138 else
12139 {
12140 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12141 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12142 }
12143
12144 if (!optab1 || !optab2)
12145 return false;
12146
12147 vec_mode = TYPE_MODE (vectype);
12148 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12149 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12150 return false;
12151
12152 *code1 = c1;
12153 *code2 = c2;
12154
12155 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12156 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12157 {
12158 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12159 return true;
12160 /* For scalar masks we may have different boolean
12161 vector types having the same QImode. Thus we
12162 add additional check for elements number. */
12163 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12164 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12165 return true;
12166 }
12167
12168 /* Check if it's a multi-step conversion that can be done using intermediate
12169 types. */
12170
12171 prev_type = vectype;
12172 prev_mode = vec_mode;
12173
12174 if (!CONVERT_EXPR_CODE_P (code))
12175 return false;
12176
12177 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12178 intermediate steps in promotion sequence. We try
12179 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12180 not. */
12181 interm_types->create (MAX_INTERM_CVT_STEPS);
12182 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12183 {
12184 intermediate_mode = insn_data[icode1].operand[0].mode;
12185 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12186 intermediate_type
12187 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12188 else
12189 intermediate_type
12190 = lang_hooks.types.type_for_mode (intermediate_mode,
12191 TYPE_UNSIGNED (prev_type));
12192
12193 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12194 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12195 && intermediate_mode == prev_mode
12196 && SCALAR_INT_MODE_P (prev_mode))
12197 {
12198 /* If the input and result modes are the same, a different optab
12199 is needed where we pass in the number of units in vectype. */
12200 optab3 = vec_unpacks_sbool_lo_optab;
12201 optab4 = vec_unpacks_sbool_hi_optab;
12202 }
12203 else
12204 {
12205 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12206 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12207 }
12208
12209 if (!optab3 || !optab4
12210 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12211 || insn_data[icode1].operand[0].mode != intermediate_mode
12212 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12213 || insn_data[icode2].operand[0].mode != intermediate_mode
12214 || ((icode1 = optab_handler (optab3, intermediate_mode))
12215 == CODE_FOR_nothing)
12216 || ((icode2 = optab_handler (optab4, intermediate_mode))
12217 == CODE_FOR_nothing))
12218 break;
12219
12220 interm_types->quick_push (intermediate_type);
12221 (*multi_step_cvt)++;
12222
12223 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12224 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12225 {
12226 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12227 return true;
12228 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12229 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12230 return true;
12231 }
12232
12233 prev_type = intermediate_type;
12234 prev_mode = intermediate_mode;
12235 }
12236
12237 interm_types->release ();
12238 return false;
12239 }
12240
12241
12242 /* Function supportable_narrowing_operation
12243
12244 Check whether an operation represented by the code CODE is a
12245 narrowing operation that is supported by the target platform in
12246 vector form (i.e., when operating on arguments of type VECTYPE_IN
12247 and producing a result of type VECTYPE_OUT).
12248
12249 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12250 and FLOAT. This function checks if these operations are supported by
12251 the target platform directly via vector tree-codes.
12252
12253 Output:
12254 - CODE1 is the code of a vector operation to be used when
12255 vectorizing the operation, if available.
12256 - MULTI_STEP_CVT determines the number of required intermediate steps in
12257 case of multi-step conversion (like int->short->char - in that case
12258 MULTI_STEP_CVT will be 1).
12259 - INTERM_TYPES contains the intermediate type required to perform the
12260 narrowing operation (short in the above example). */
12261
12262 bool
12263 supportable_narrowing_operation (enum tree_code code,
12264 tree vectype_out, tree vectype_in,
12265 enum tree_code *code1, int *multi_step_cvt,
12266 vec<tree> *interm_types)
12267 {
12268 machine_mode vec_mode;
12269 enum insn_code icode1;
12270 optab optab1, interm_optab;
12271 tree vectype = vectype_in;
12272 tree narrow_vectype = vectype_out;
12273 enum tree_code c1;
12274 tree intermediate_type, prev_type;
12275 machine_mode intermediate_mode, prev_mode;
12276 int i;
12277 unsigned HOST_WIDE_INT n_elts;
12278 bool uns;
12279
12280 *multi_step_cvt = 0;
12281 switch (code)
12282 {
12283 CASE_CONVERT:
12284 c1 = VEC_PACK_TRUNC_EXPR;
12285 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12286 && VECTOR_BOOLEAN_TYPE_P (vectype)
12287 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12288 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12289 && n_elts < BITS_PER_UNIT)
12290 optab1 = vec_pack_sbool_trunc_optab;
12291 else
12292 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12293 break;
12294
12295 case FIX_TRUNC_EXPR:
12296 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12297 /* The signedness is determined from output operand. */
12298 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12299 break;
12300
12301 case FLOAT_EXPR:
12302 c1 = VEC_PACK_FLOAT_EXPR;
12303 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12304 break;
12305
12306 default:
12307 gcc_unreachable ();
12308 }
12309
12310 if (!optab1)
12311 return false;
12312
12313 vec_mode = TYPE_MODE (vectype);
12314 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12315 return false;
12316
12317 *code1 = c1;
12318
12319 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12320 {
12321 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12322 return true;
12323 /* For scalar masks we may have different boolean
12324 vector types having the same QImode. Thus we
12325 add additional check for elements number. */
12326 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12327 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12328 return true;
12329 }
12330
12331 if (code == FLOAT_EXPR)
12332 return false;
12333
12334 /* Check if it's a multi-step conversion that can be done using intermediate
12335 types. */
12336 prev_mode = vec_mode;
12337 prev_type = vectype;
12338 if (code == FIX_TRUNC_EXPR)
12339 uns = TYPE_UNSIGNED (vectype_out);
12340 else
12341 uns = TYPE_UNSIGNED (vectype);
12342
12343 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12344 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12345 costly than signed. */
12346 if (code == FIX_TRUNC_EXPR && uns)
12347 {
12348 enum insn_code icode2;
12349
12350 intermediate_type
12351 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12352 interm_optab
12353 = optab_for_tree_code (c1, intermediate_type, optab_default);
12354 if (interm_optab != unknown_optab
12355 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12356 && insn_data[icode1].operand[0].mode
12357 == insn_data[icode2].operand[0].mode)
12358 {
12359 uns = false;
12360 optab1 = interm_optab;
12361 icode1 = icode2;
12362 }
12363 }
12364
12365 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12366 intermediate steps in promotion sequence. We try
12367 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12368 interm_types->create (MAX_INTERM_CVT_STEPS);
12369 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12370 {
12371 intermediate_mode = insn_data[icode1].operand[0].mode;
12372 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12373 intermediate_type
12374 = vect_double_mask_nunits (prev_type, intermediate_mode);
12375 else
12376 intermediate_type
12377 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12378 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12379 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12380 && SCALAR_INT_MODE_P (prev_mode)
12381 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12382 && n_elts < BITS_PER_UNIT)
12383 interm_optab = vec_pack_sbool_trunc_optab;
12384 else
12385 interm_optab
12386 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12387 optab_default);
12388 if (!interm_optab
12389 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12390 || insn_data[icode1].operand[0].mode != intermediate_mode
12391 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12392 == CODE_FOR_nothing))
12393 break;
12394
12395 interm_types->quick_push (intermediate_type);
12396 (*multi_step_cvt)++;
12397
12398 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12399 {
12400 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12401 return true;
12402 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12403 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12404 return true;
12405 }
12406
12407 prev_mode = intermediate_mode;
12408 prev_type = intermediate_type;
12409 optab1 = interm_optab;
12410 }
12411
12412 interm_types->release ();
12413 return false;
12414 }
12415
12416 /* Generate and return a vector mask of MASK_TYPE such that
12417 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12418 Add the statements to SEQ. */
12419
12420 tree
12421 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12422 tree end_index, const char *name)
12423 {
12424 tree cmp_type = TREE_TYPE (start_index);
12425 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12426 cmp_type, mask_type,
12427 OPTIMIZE_FOR_SPEED));
12428 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12429 start_index, end_index,
12430 build_zero_cst (mask_type));
12431 tree tmp;
12432 if (name)
12433 tmp = make_temp_ssa_name (mask_type, NULL, name);
12434 else
12435 tmp = make_ssa_name (mask_type);
12436 gimple_call_set_lhs (call, tmp);
12437 gimple_seq_add_stmt (seq, call);
12438 return tmp;
12439 }
12440
12441 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12442 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12443
12444 tree
12445 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12446 tree end_index)
12447 {
12448 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12449 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12450 }
12451
12452 /* Try to compute the vector types required to vectorize STMT_INFO,
12453 returning true on success and false if vectorization isn't possible.
12454 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12455 take sure that the number of elements in the vectors is no bigger
12456 than GROUP_SIZE.
12457
12458 On success:
12459
12460 - Set *STMT_VECTYPE_OUT to:
12461 - NULL_TREE if the statement doesn't need to be vectorized;
12462 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12463
12464 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12465 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12466 statement does not help to determine the overall number of units. */
12467
12468 opt_result
12469 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12470 tree *stmt_vectype_out,
12471 tree *nunits_vectype_out,
12472 unsigned int group_size)
12473 {
12474 gimple *stmt = stmt_info->stmt;
12475
12476 /* For BB vectorization, we should always have a group size once we've
12477 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12478 are tentative requests during things like early data reference
12479 analysis and pattern recognition. */
12480 if (is_a <bb_vec_info> (vinfo))
12481 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12482 else
12483 group_size = 0;
12484
12485 *stmt_vectype_out = NULL_TREE;
12486 *nunits_vectype_out = NULL_TREE;
12487
12488 if (gimple_get_lhs (stmt) == NULL_TREE
12489 /* MASK_STORE has no lhs, but is ok. */
12490 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12491 {
12492 if (is_a <gcall *> (stmt))
12493 {
12494 /* Ignore calls with no lhs. These must be calls to
12495 #pragma omp simd functions, and what vectorization factor
12496 it really needs can't be determined until
12497 vectorizable_simd_clone_call. */
12498 if (dump_enabled_p ())
12499 dump_printf_loc (MSG_NOTE, vect_location,
12500 "defer to SIMD clone analysis.\n");
12501 return opt_result::success ();
12502 }
12503
12504 return opt_result::failure_at (stmt,
12505 "not vectorized: irregular stmt.%G", stmt);
12506 }
12507
12508 tree vectype;
12509 tree scalar_type = NULL_TREE;
12510 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12511 {
12512 vectype = STMT_VINFO_VECTYPE (stmt_info);
12513 if (dump_enabled_p ())
12514 dump_printf_loc (MSG_NOTE, vect_location,
12515 "precomputed vectype: %T\n", vectype);
12516 }
12517 else if (vect_use_mask_type_p (stmt_info))
12518 {
12519 unsigned int precision = stmt_info->mask_precision;
12520 scalar_type = build_nonstandard_integer_type (precision, 1);
12521 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12522 if (!vectype)
12523 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12524 " data-type %T\n", scalar_type);
12525 if (dump_enabled_p ())
12526 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12527 }
12528 else
12529 {
12530 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12531 scalar_type = TREE_TYPE (DR_REF (dr));
12532 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12533 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12534 else
12535 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12536
12537 if (dump_enabled_p ())
12538 {
12539 if (group_size)
12540 dump_printf_loc (MSG_NOTE, vect_location,
12541 "get vectype for scalar type (group size %d):"
12542 " %T\n", group_size, scalar_type);
12543 else
12544 dump_printf_loc (MSG_NOTE, vect_location,
12545 "get vectype for scalar type: %T\n", scalar_type);
12546 }
12547 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12548 if (!vectype)
12549 return opt_result::failure_at (stmt,
12550 "not vectorized:"
12551 " unsupported data-type %T\n",
12552 scalar_type);
12553
12554 if (dump_enabled_p ())
12555 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12556 }
12557
12558 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12559 return opt_result::failure_at (stmt,
12560 "not vectorized: vector stmt in loop:%G",
12561 stmt);
12562
12563 *stmt_vectype_out = vectype;
12564
12565 /* Don't try to compute scalar types if the stmt produces a boolean
12566 vector; use the existing vector type instead. */
12567 tree nunits_vectype = vectype;
12568 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12569 {
12570 /* The number of units is set according to the smallest scalar
12571 type (or the largest vector size, but we only support one
12572 vector size per vectorization). */
12573 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12574 TREE_TYPE (vectype));
12575 if (scalar_type != TREE_TYPE (vectype))
12576 {
12577 if (dump_enabled_p ())
12578 dump_printf_loc (MSG_NOTE, vect_location,
12579 "get vectype for smallest scalar type: %T\n",
12580 scalar_type);
12581 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12582 group_size);
12583 if (!nunits_vectype)
12584 return opt_result::failure_at
12585 (stmt, "not vectorized: unsupported data-type %T\n",
12586 scalar_type);
12587 if (dump_enabled_p ())
12588 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12589 nunits_vectype);
12590 }
12591 }
12592
12593 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12594 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12595 return opt_result::failure_at (stmt,
12596 "Not vectorized: Incompatible number "
12597 "of vector subparts between %T and %T\n",
12598 nunits_vectype, *stmt_vectype_out);
12599
12600 if (dump_enabled_p ())
12601 {
12602 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12603 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12604 dump_printf (MSG_NOTE, "\n");
12605 }
12606
12607 *nunits_vectype_out = nunits_vectype;
12608 return opt_result::success ();
12609 }
12610
12611 /* Generate and return statement sequence that sets vector length LEN that is:
12612
12613 min_of_start_and_end = min (START_INDEX, END_INDEX);
12614 left_len = END_INDEX - min_of_start_and_end;
12615 rhs = min (left_len, LEN_LIMIT);
12616 LEN = rhs;
12617
12618 Note: the cost of the code generated by this function is modeled
12619 by vect_estimate_min_profitable_iters, so changes here may need
12620 corresponding changes there. */
12621
12622 gimple_seq
12623 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12624 {
12625 gimple_seq stmts = NULL;
12626 tree len_type = TREE_TYPE (len);
12627 gcc_assert (TREE_TYPE (start_index) == len_type);
12628
12629 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12630 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12631 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12632 gimple* stmt = gimple_build_assign (len, rhs);
12633 gimple_seq_add_stmt (&stmts, stmt);
12634
12635 return stmts;
12636 }
12637