]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-stmts.c
[Ada] Fold Enum_Rep attribute in evaluation and not in expansion
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
643
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
646 }
647 }
648
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
651 {
652 use_operand_p use_p;
653 ssa_op_iter iter;
654
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
659
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
664
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
667
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
675
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
677 {
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
687
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
695
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
703
704 default:
705 break;
706 }
707
708 if (is_pattern_stmt_p (stmt_vinfo))
709 {
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
714 {
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
717
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
720 {
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
731 }
732 for (; i < gimple_num_ops (assign); i++)
733 {
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
736 {
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
742 }
743 }
744 }
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
746 {
747 for (i = 0; i < gimple_call_num_args (call); i++)
748 {
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
755 }
756 }
757 }
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
760 {
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
767 }
768
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
770 {
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 {
779 if (fatal)
780 *fatal = false;
781 return res;
782 }
783 }
784 } /* while worklist */
785
786 return opt_result::success ();
787 }
788
789 /* Function vect_model_simple_cost.
790
791 Models cost for simple operations, i.e. those that only emit ncopies of a
792 single op. Right now, this does not account for multiple insns that could
793 be generated for the single vector op. We will handle that shortly. */
794
795 static void
796 vect_model_simple_cost (vec_info *,
797 stmt_vec_info stmt_info, int ncopies,
798 enum vect_def_type *dt,
799 int ndts,
800 slp_tree node,
801 stmt_vector_for_cost *cost_vec,
802 vect_cost_for_stmt kind = vector_stmt)
803 {
804 int inside_cost = 0, prologue_cost = 0;
805
806 gcc_assert (cost_vec != NULL);
807
808 /* ??? Somehow we need to fix this at the callers. */
809 if (node)
810 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811
812 if (!node)
813 /* Cost the "broadcast" of a scalar operand in to a vector operand.
814 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
815 cost model. */
816 for (int i = 0; i < ndts; i++)
817 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
818 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
819 stmt_info, 0, vect_prologue);
820
821 /* Adjust for two-operator SLP nodes. */
822 if (node && SLP_TREE_TWO_OPERATORS (node))
823 {
824 ncopies *= 2;
825 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
826 stmt_info, 0, vect_body);
827 }
828
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
831 stmt_info, 0, vect_body);
832
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE, vect_location,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .\n", inside_cost, prologue_cost);
837 }
838
839
840 /* Model cost for type demotion and promotion operations. PWR is
841 normally zero for single-step promotions and demotions. It will be
842 one if two-step promotion/demotion is required, and so on. NCOPIES
843 is the number of vector results (and thus number of instructions)
844 for the narrowest end of the operation chain. Each additional
845 step doubles the number of instructions required. */
846
847 static void
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
849 enum vect_def_type *dt,
850 unsigned int ncopies, int pwr,
851 stmt_vector_for_cost *cost_vec)
852 {
853 int i;
854 int inside_cost = 0, prologue_cost = 0;
855
856 for (i = 0; i < pwr + 1; i++)
857 {
858 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
859 stmt_info, 0, vect_body);
860 ncopies *= 2;
861 }
862
863 /* FORNOW: Assuming maximum 2 args per stmts. */
864 for (i = 0; i < 2; i++)
865 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
866 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
867 stmt_info, 0, vect_prologue);
868
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE, vect_location,
871 "vect_model_promotion_demotion_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 }
874
875 /* Returns true if the current function returns DECL. */
876
877 static bool
878 cfun_returns (tree decl)
879 {
880 edge_iterator ei;
881 edge e;
882 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
883 {
884 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
885 if (!ret)
886 continue;
887 if (gimple_return_retval (ret) == decl)
888 return true;
889 /* We often end up with an aggregate copy to the result decl,
890 handle that case as well. First skip intermediate clobbers
891 though. */
892 gimple *def = ret;
893 do
894 {
895 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
896 }
897 while (gimple_clobber_p (def));
898 if (is_a <gassign *> (def)
899 && gimple_assign_lhs (def) == gimple_return_retval (ret)
900 && gimple_assign_rhs1 (def) == decl)
901 return true;
902 }
903 return false;
904 }
905
906 /* Function vect_model_store_cost
907
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
910
911 static void
912 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
913 vect_memory_access_type memory_access_type,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
916 {
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
924
925 if (vls_type == VLS_STORE_INVARIANT)
926 {
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
930 }
931
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
936
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
941
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
948 {
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
955
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
960 }
961
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
966 {
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
972 }
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
975
976 if (memory_access_type == VMAT_ELEMENTWISE
977 || memory_access_type == VMAT_STRIDED_SLP)
978 {
979 /* N scalar stores plus extracting the elements. */
980 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
981 inside_cost += record_stmt_cost (cost_vec,
982 ncopies * assumed_nunits,
983 vec_to_scalar, stmt_info, 0, vect_body);
984 }
985
986 /* When vectorizing a store into the function result assign
987 a penalty if the function returns in a multi-register location.
988 In this case we assume we'll end up with having to spill the
989 vector result and do piecewise loads as a conservative estimate. */
990 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
991 if (base
992 && (TREE_CODE (base) == RESULT_DECL
993 || (DECL_P (base) && cfun_returns (base)))
994 && !aggregate_value_p (base, cfun->decl))
995 {
996 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
997 /* ??? Handle PARALLEL in some way. */
998 if (REG_P (reg))
999 {
1000 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1001 /* Assume that a single reg-reg move is possible and cheap,
1002 do not account for vector to gp register move cost. */
1003 if (nregs > 1)
1004 {
1005 /* Spill. */
1006 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1007 vector_store,
1008 stmt_info, 0, vect_epilogue);
1009 /* Loads. */
1010 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1011 scalar_load,
1012 stmt_info, 0, vect_epilogue);
1013 }
1014 }
1015 }
1016
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: inside_cost = %d, "
1020 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1021 }
1022
1023
1024 /* Calculate cost of DR's memory access. */
1025 void
1026 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1027 unsigned int *inside_cost,
1028 stmt_vector_for_cost *body_cost_vec)
1029 {
1030 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1031 int alignment_support_scheme
1032 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1033
1034 switch (alignment_support_scheme)
1035 {
1036 case dr_aligned:
1037 {
1038 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1039 vector_store, stmt_info, 0,
1040 vect_body);
1041
1042 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_NOTE, vect_location,
1044 "vect_model_store_cost: aligned.\n");
1045 break;
1046 }
1047
1048 case dr_unaligned_supported:
1049 {
1050 /* Here, we assign an additional cost for the unaligned store. */
1051 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1052 unaligned_store, stmt_info,
1053 DR_MISALIGNMENT (dr_info),
1054 vect_body);
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE, vect_location,
1057 "vect_model_store_cost: unaligned supported by "
1058 "hardware.\n");
1059 break;
1060 }
1061
1062 case dr_unaligned_unsupported:
1063 {
1064 *inside_cost = VECT_MAX_COST;
1065
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1068 "vect_model_store_cost: unsupported access.\n");
1069 break;
1070 }
1071
1072 default:
1073 gcc_unreachable ();
1074 }
1075 }
1076
1077
1078 /* Function vect_model_load_cost
1079
1080 Models cost for loads. In the case of grouped accesses, one access has
1081 the overhead of the grouped access attributed to it. Since unaligned
1082 accesses are supported for loads, we also account for the costs of the
1083 access scheme chosen. */
1084
1085 static void
1086 vect_model_load_cost (vec_info *vinfo,
1087 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1088 vect_memory_access_type memory_access_type,
1089 slp_tree slp_node,
1090 stmt_vector_for_cost *cost_vec)
1091 {
1092 unsigned int inside_cost = 0, prologue_cost = 0;
1093 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1094
1095 gcc_assert (cost_vec);
1096
1097 /* ??? Somehow we need to fix this at the callers. */
1098 if (slp_node)
1099 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1100
1101 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1102 {
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1106 /* Record the cost for the permutation. */
1107 unsigned n_perms;
1108 unsigned assumed_nunits
1109 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1110 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1111 vf, true, &n_perms);
1112 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1113 first_stmt_info, 0, vect_body);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1117 bitmap_clear (perm);
1118 for (unsigned i = 0;
1119 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1120 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1121 ncopies = 0;
1122 bool load_seen = false;
1123 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1124 {
1125 if (i % assumed_nunits == 0)
1126 {
1127 if (load_seen)
1128 ncopies++;
1129 load_seen = false;
1130 }
1131 if (bitmap_bit_p (perm, i))
1132 load_seen = true;
1133 }
1134 if (load_seen)
1135 ncopies++;
1136 gcc_assert (ncopies
1137 <= (DR_GROUP_SIZE (first_stmt_info)
1138 - DR_GROUP_GAP (first_stmt_info)
1139 + assumed_nunits - 1) / assumed_nunits);
1140 }
1141
1142 /* Grouped loads read all elements in the group at once,
1143 so we want the DR for the first statement. */
1144 stmt_vec_info first_stmt_info = stmt_info;
1145 if (!slp_node && grouped_access_p)
1146 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1147
1148 /* True if we should include any once-per-group costs as well as
1149 the cost of the statement itself. For SLP we only get called
1150 once per group anyhow. */
1151 bool first_stmt_p = (first_stmt_info == stmt_info);
1152
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1157 if (first_stmt_p
1158 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1159 {
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size = DR_GROUP_SIZE (first_stmt_info);
1163 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1164 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1165 stmt_info, 0, vect_body);
1166
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1170 group_size);
1171 }
1172
1173 /* The loads themselves. */
1174 if (memory_access_type == VMAT_ELEMENTWISE
1175 || memory_access_type == VMAT_GATHER_SCATTER)
1176 {
1177 /* N scalar loads plus gathering them into a vector. */
1178 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1179 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1180 inside_cost += record_stmt_cost (cost_vec,
1181 ncopies * assumed_nunits,
1182 scalar_load, stmt_info, 0, vect_body);
1183 }
1184 else
1185 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1186 &inside_cost, &prologue_cost,
1187 cost_vec, cost_vec, true);
1188 if (memory_access_type == VMAT_ELEMENTWISE
1189 || memory_access_type == VMAT_STRIDED_SLP)
1190 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1191 stmt_info, 0, vect_body);
1192
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE, vect_location,
1195 "vect_model_load_cost: inside_cost = %d, "
1196 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1197 }
1198
1199
1200 /* Calculate cost of DR's memory access. */
1201 void
1202 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1203 bool add_realign_cost, unsigned int *inside_cost,
1204 unsigned int *prologue_cost,
1205 stmt_vector_for_cost *prologue_cost_vec,
1206 stmt_vector_for_cost *body_cost_vec,
1207 bool record_prologue_costs)
1208 {
1209 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1210 int alignment_support_scheme
1211 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1212
1213 switch (alignment_support_scheme)
1214 {
1215 case dr_aligned:
1216 {
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1218 stmt_info, 0, vect_body);
1219
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE, vect_location,
1222 "vect_model_load_cost: aligned.\n");
1223
1224 break;
1225 }
1226 case dr_unaligned_supported:
1227 {
1228 /* Here, we assign an additional cost for the unaligned load. */
1229 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1230 unaligned_load, stmt_info,
1231 DR_MISALIGNMENT (dr_info),
1232 vect_body);
1233
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_NOTE, vect_location,
1236 "vect_model_load_cost: unaligned supported by "
1237 "hardware.\n");
1238
1239 break;
1240 }
1241 case dr_explicit_realign:
1242 {
1243 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1244 vector_load, stmt_info, 0, vect_body);
1245 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1246 vec_perm, stmt_info, 0, vect_body);
1247
1248 /* FIXME: If the misalignment remains fixed across the iterations of
1249 the containing loop, the following cost should be added to the
1250 prologue costs. */
1251 if (targetm.vectorize.builtin_mask_for_load)
1252 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1253 stmt_info, 0, vect_body);
1254
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "vect_model_load_cost: explicit realign\n");
1258
1259 break;
1260 }
1261 case dr_explicit_realign_optimized:
1262 {
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location,
1265 "vect_model_load_cost: unaligned software "
1266 "pipelined.\n");
1267
1268 /* Unaligned software pipeline has a load of an address, an initial
1269 load, and possibly a mask operation to "prime" the loop. However,
1270 if this is an access in a group of loads, which provide grouped
1271 access, then the above cost should only be considered for one
1272 access in the group. Inside the loop, there is a load op
1273 and a realignment op. */
1274
1275 if (add_realign_cost && record_prologue_costs)
1276 {
1277 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1278 vector_stmt, stmt_info,
1279 0, vect_prologue);
1280 if (targetm.vectorize.builtin_mask_for_load)
1281 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1282 vector_stmt, stmt_info,
1283 0, vect_prologue);
1284 }
1285
1286 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1287 stmt_info, 0, vect_body);
1288 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1289 stmt_info, 0, vect_body);
1290
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "vect_model_load_cost: explicit realign optimized"
1294 "\n");
1295
1296 break;
1297 }
1298
1299 case dr_unaligned_unsupported:
1300 {
1301 *inside_cost = VECT_MAX_COST;
1302
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1305 "vect_model_load_cost: unsupported access.\n");
1306 break;
1307 }
1308
1309 default:
1310 gcc_unreachable ();
1311 }
1312 }
1313
1314 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1315 the loop preheader for the vectorized stmt STMT_VINFO. */
1316
1317 static void
1318 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1319 gimple_stmt_iterator *gsi)
1320 {
1321 if (gsi)
1322 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1323 else
1324 {
1325 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1326
1327 if (loop_vinfo)
1328 {
1329 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1330 basic_block new_bb;
1331 edge pe;
1332
1333 if (stmt_vinfo && nested_in_vect_loop_p (loop, stmt_vinfo))
1334 loop = loop->inner;
1335
1336 pe = loop_preheader_edge (loop);
1337 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1338 gcc_assert (!new_bb);
1339 }
1340 else
1341 {
1342 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
1343 gimple_stmt_iterator gsi_region_begin = bb_vinfo->region_begin;
1344 gsi_insert_before (&gsi_region_begin, new_stmt, GSI_SAME_STMT);
1345 }
1346 }
1347
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE, vect_location,
1350 "created new init_stmt: %G", new_stmt);
1351 }
1352
1353 /* Function vect_init_vector.
1354
1355 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1356 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1357 vector type a vector with all elements equal to VAL is created first.
1358 Place the initialization at GSI if it is not NULL. Otherwise, place the
1359 initialization at the loop preheader.
1360 Return the DEF of INIT_STMT.
1361 It will be used in the vectorization of STMT_INFO. */
1362
1363 tree
1364 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1365 gimple_stmt_iterator *gsi)
1366 {
1367 gimple *init_stmt;
1368 tree new_temp;
1369
1370 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1371 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1372 {
1373 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1374 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1375 {
1376 /* Scalar boolean value should be transformed into
1377 all zeros or all ones value before building a vector. */
1378 if (VECTOR_BOOLEAN_TYPE_P (type))
1379 {
1380 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1381 tree false_val = build_zero_cst (TREE_TYPE (type));
1382
1383 if (CONSTANT_CLASS_P (val))
1384 val = integer_zerop (val) ? false_val : true_val;
1385 else
1386 {
1387 new_temp = make_ssa_name (TREE_TYPE (type));
1388 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1389 val, true_val, false_val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 val = new_temp;
1392 }
1393 }
1394 else
1395 {
1396 gimple_seq stmts = NULL;
1397 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1398 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1399 TREE_TYPE (type), val);
1400 else
1401 /* ??? Condition vectorization expects us to do
1402 promotion of invariant/external defs. */
1403 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1404 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1405 !gsi_end_p (gsi2); )
1406 {
1407 init_stmt = gsi_stmt (gsi2);
1408 gsi_remove (&gsi2, false);
1409 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1410 }
1411 }
1412 }
1413 val = build_vector_from_val (type, val);
1414 }
1415
1416 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1417 init_stmt = gimple_build_assign (new_temp, val);
1418 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1419 return new_temp;
1420 }
1421
1422
1423 /* Function vect_get_vec_defs_for_operand.
1424
1425 OP is an operand in STMT_VINFO. This function returns a vector of
1426 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1427
1428 In the case that OP is an SSA_NAME which is defined in the loop, then
1429 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1430
1431 In case OP is an invariant or constant, a new stmt that creates a vector def
1432 needs to be introduced. VECTYPE may be used to specify a required type for
1433 vector invariant. */
1434
1435 void
1436 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1437 unsigned ncopies,
1438 tree op, vec<tree> *vec_oprnds, tree vectype)
1439 {
1440 gimple *def_stmt;
1441 enum vect_def_type dt;
1442 bool is_simple_use;
1443 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1444
1445 if (dump_enabled_p ())
1446 dump_printf_loc (MSG_NOTE, vect_location,
1447 "vect_get_vec_defs_for_operand: %T\n", op);
1448
1449 stmt_vec_info def_stmt_info;
1450 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1451 &def_stmt_info, &def_stmt);
1452 gcc_assert (is_simple_use);
1453 if (def_stmt && dump_enabled_p ())
1454 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1455
1456 vec_oprnds->create (ncopies);
1457 if (dt == vect_constant_def || dt == vect_external_def)
1458 {
1459 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1460 tree vector_type;
1461
1462 if (vectype)
1463 vector_type = vectype;
1464 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1465 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1466 vector_type = truth_type_for (stmt_vectype);
1467 else
1468 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1469
1470 gcc_assert (vector_type);
1471 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1472 while (ncopies--)
1473 vec_oprnds->quick_push (vop);
1474 }
1475 else
1476 {
1477 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1478 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1479 for (unsigned i = 0; i < ncopies; ++i)
1480 vec_oprnds->quick_push (gimple_get_lhs
1481 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1482 }
1483 }
1484
1485
1486 /* Get vectorized definitions for OP0 and OP1. */
1487
1488 void
1489 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1490 unsigned ncopies,
1491 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1492 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1493 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1494 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1495 {
1496 if (slp_node)
1497 {
1498 if (op0)
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1500 if (op1)
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1502 if (op2)
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1504 if (op3)
1505 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1506 }
1507 else
1508 {
1509 if (op0)
1510 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1511 op0, vec_oprnds0, vectype0);
1512 if (op1)
1513 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1514 op1, vec_oprnds1, vectype1);
1515 if (op2)
1516 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1517 op2, vec_oprnds2, vectype2);
1518 if (op3)
1519 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1520 op3, vec_oprnds3, vectype3);
1521 }
1522 }
1523
1524 void
1525 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1526 unsigned ncopies,
1527 tree op0, vec<tree> *vec_oprnds0,
1528 tree op1, vec<tree> *vec_oprnds1,
1529 tree op2, vec<tree> *vec_oprnds2,
1530 tree op3, vec<tree> *vec_oprnds3)
1531 {
1532 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1533 op0, vec_oprnds0, NULL_TREE,
1534 op1, vec_oprnds1, NULL_TREE,
1535 op2, vec_oprnds2, NULL_TREE,
1536 op3, vec_oprnds3, NULL_TREE);
1537 }
1538
1539 /* Helper function called by vect_finish_replace_stmt and
1540 vect_finish_stmt_generation. Set the location of the new
1541 statement and create and return a stmt_vec_info for it. */
1542
1543 static void
1544 vect_finish_stmt_generation_1 (vec_info *,
1545 stmt_vec_info stmt_info, gimple *vec_stmt)
1546 {
1547 if (dump_enabled_p ())
1548 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1549
1550 if (stmt_info)
1551 {
1552 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1553
1554 /* While EH edges will generally prevent vectorization, stmt might
1555 e.g. be in a must-not-throw region. Ensure newly created stmts
1556 that could throw are part of the same region. */
1557 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1558 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1559 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1560 }
1561 else
1562 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1563 }
1564
1565 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1566 which sets the same scalar result as STMT_INFO did. Create and return a
1567 stmt_vec_info for VEC_STMT. */
1568
1569 void
1570 vect_finish_replace_stmt (vec_info *vinfo,
1571 stmt_vec_info stmt_info, gimple *vec_stmt)
1572 {
1573 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1574 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1575
1576 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1577 gsi_replace (&gsi, vec_stmt, true);
1578
1579 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1580 }
1581
1582 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1583 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1584
1585 void
1586 vect_finish_stmt_generation (vec_info *vinfo,
1587 stmt_vec_info stmt_info, gimple *vec_stmt,
1588 gimple_stmt_iterator *gsi)
1589 {
1590 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1591
1592 if (!gsi_end_p (*gsi)
1593 && gimple_has_mem_ops (vec_stmt))
1594 {
1595 gimple *at_stmt = gsi_stmt (*gsi);
1596 tree vuse = gimple_vuse (at_stmt);
1597 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1598 {
1599 tree vdef = gimple_vdef (at_stmt);
1600 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1601 /* If we have an SSA vuse and insert a store, update virtual
1602 SSA form to avoid triggering the renamer. Do so only
1603 if we can easily see all uses - which is what almost always
1604 happens with the way vectorized stmts are inserted. */
1605 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1606 && ((is_gimple_assign (vec_stmt)
1607 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1608 || (is_gimple_call (vec_stmt)
1609 && !(gimple_call_flags (vec_stmt)
1610 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1611 {
1612 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1613 gimple_set_vdef (vec_stmt, new_vdef);
1614 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1615 }
1616 }
1617 }
1618 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1619 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1620 }
1621
1622 /* We want to vectorize a call to combined function CFN with function
1623 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1624 as the types of all inputs. Check whether this is possible using
1625 an internal function, returning its code if so or IFN_LAST if not. */
1626
1627 static internal_fn
1628 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1629 tree vectype_out, tree vectype_in)
1630 {
1631 internal_fn ifn;
1632 if (internal_fn_p (cfn))
1633 ifn = as_internal_fn (cfn);
1634 else
1635 ifn = associated_internal_fn (fndecl);
1636 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1637 {
1638 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1639 if (info.vectorizable)
1640 {
1641 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1642 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1643 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1644 OPTIMIZE_FOR_SPEED))
1645 return ifn;
1646 }
1647 }
1648 return IFN_LAST;
1649 }
1650
1651
1652 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1653 gimple_stmt_iterator *);
1654
1655 /* Check whether a load or store statement in the loop described by
1656 LOOP_VINFO is possible in a fully-masked loop. This is testing
1657 whether the vectorizer pass has the appropriate support, as well as
1658 whether the target does.
1659
1660 VLS_TYPE says whether the statement is a load or store and VECTYPE
1661 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1662 says how the load or store is going to be implemented and GROUP_SIZE
1663 is the number of load or store statements in the containing group.
1664 If the access is a gather load or scatter store, GS_INFO describes
1665 its arguments. If the load or store is conditional, SCALAR_MASK is the
1666 condition under which it occurs.
1667
1668 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1669 supported, otherwise record the required mask types. */
1670
1671 static void
1672 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1673 vec_load_store_type vls_type, int group_size,
1674 vect_memory_access_type memory_access_type,
1675 gather_scatter_info *gs_info, tree scalar_mask)
1676 {
1677 /* Invariant loads need no special support. */
1678 if (memory_access_type == VMAT_INVARIANT)
1679 return;
1680
1681 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1682 machine_mode vecmode = TYPE_MODE (vectype);
1683 bool is_load = (vls_type == VLS_LOAD);
1684 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1685 {
1686 if (is_load
1687 ? !vect_load_lanes_supported (vectype, group_size, true)
1688 : !vect_store_lanes_supported (vectype, group_size, true))
1689 {
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 "can't use a fully-masked loop because the"
1693 " target doesn't have an appropriate masked"
1694 " load/store-lanes instruction.\n");
1695 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1696 return;
1697 }
1698 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1699 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1700 return;
1701 }
1702
1703 if (memory_access_type == VMAT_GATHER_SCATTER)
1704 {
1705 internal_fn ifn = (is_load
1706 ? IFN_MASK_GATHER_LOAD
1707 : IFN_MASK_SCATTER_STORE);
1708 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1709 gs_info->memory_type,
1710 gs_info->offset_vectype,
1711 gs_info->scale))
1712 {
1713 if (dump_enabled_p ())
1714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1715 "can't use a fully-masked loop because the"
1716 " target doesn't have an appropriate masked"
1717 " gather load or scatter store instruction.\n");
1718 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1719 return;
1720 }
1721 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1722 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1723 return;
1724 }
1725
1726 if (memory_access_type != VMAT_CONTIGUOUS
1727 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1728 {
1729 /* Element X of the data must come from iteration i * VF + X of the
1730 scalar loop. We need more work to support other mappings. */
1731 if (dump_enabled_p ())
1732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1733 "can't use a fully-masked loop because an access"
1734 " isn't contiguous.\n");
1735 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1736 return;
1737 }
1738
1739 machine_mode mask_mode;
1740 if (!VECTOR_MODE_P (vecmode)
1741 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1742 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1743 {
1744 if (dump_enabled_p ())
1745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1746 "can't use a fully-masked loop because the target"
1747 " doesn't have the appropriate masked load or"
1748 " store.\n");
1749 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1750 return;
1751 }
1752 /* We might load more scalars than we need for permuting SLP loads.
1753 We checked in get_group_load_store_type that the extra elements
1754 don't leak into a new vector. */
1755 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1756 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1757 unsigned int nvectors;
1758 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1759 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1760 else
1761 gcc_unreachable ();
1762 }
1763
1764 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1765 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1766 that needs to be applied to all loads and stores in a vectorized loop.
1767 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1768
1769 MASK_TYPE is the type of both masks. If new statements are needed,
1770 insert them before GSI. */
1771
1772 static tree
1773 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1774 gimple_stmt_iterator *gsi)
1775 {
1776 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1777 if (!loop_mask)
1778 return vec_mask;
1779
1780 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1781 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1782 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1783 vec_mask, loop_mask);
1784 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1785 return and_res;
1786 }
1787
1788 /* Determine whether we can use a gather load or scatter store to vectorize
1789 strided load or store STMT_INFO by truncating the current offset to a
1790 smaller width. We need to be able to construct an offset vector:
1791
1792 { 0, X, X*2, X*3, ... }
1793
1794 without loss of precision, where X is STMT_INFO's DR_STEP.
1795
1796 Return true if this is possible, describing the gather load or scatter
1797 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1798
1799 static bool
1800 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1801 loop_vec_info loop_vinfo, bool masked_p,
1802 gather_scatter_info *gs_info)
1803 {
1804 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1805 data_reference *dr = dr_info->dr;
1806 tree step = DR_STEP (dr);
1807 if (TREE_CODE (step) != INTEGER_CST)
1808 {
1809 /* ??? Perhaps we could use range information here? */
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_NOTE, vect_location,
1812 "cannot truncate variable step.\n");
1813 return false;
1814 }
1815
1816 /* Get the number of bits in an element. */
1817 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1818 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1819 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1820
1821 /* Set COUNT to the upper limit on the number of elements - 1.
1822 Start with the maximum vectorization factor. */
1823 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1824
1825 /* Try lowering COUNT to the number of scalar latch iterations. */
1826 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1827 widest_int max_iters;
1828 if (max_loop_iterations (loop, &max_iters)
1829 && max_iters < count)
1830 count = max_iters.to_shwi ();
1831
1832 /* Try scales of 1 and the element size. */
1833 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1834 wi::overflow_type overflow = wi::OVF_NONE;
1835 for (int i = 0; i < 2; ++i)
1836 {
1837 int scale = scales[i];
1838 widest_int factor;
1839 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1840 continue;
1841
1842 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1843 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1844 if (overflow)
1845 continue;
1846 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1847 unsigned int min_offset_bits = wi::min_precision (range, sign);
1848
1849 /* Find the narrowest viable offset type. */
1850 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1851 tree offset_type = build_nonstandard_integer_type (offset_bits,
1852 sign == UNSIGNED);
1853
1854 /* See whether the target supports the operation with an offset
1855 no narrower than OFFSET_TYPE. */
1856 tree memory_type = TREE_TYPE (DR_REF (dr));
1857 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1858 vectype, memory_type, offset_type, scale,
1859 &gs_info->ifn, &gs_info->offset_vectype))
1860 continue;
1861
1862 gs_info->decl = NULL_TREE;
1863 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1864 but we don't need to store that here. */
1865 gs_info->base = NULL_TREE;
1866 gs_info->element_type = TREE_TYPE (vectype);
1867 gs_info->offset = fold_convert (offset_type, step);
1868 gs_info->offset_dt = vect_constant_def;
1869 gs_info->scale = scale;
1870 gs_info->memory_type = memory_type;
1871 return true;
1872 }
1873
1874 if (overflow && dump_enabled_p ())
1875 dump_printf_loc (MSG_NOTE, vect_location,
1876 "truncating gather/scatter offset to %d bits"
1877 " might change its value.\n", element_bits);
1878
1879 return false;
1880 }
1881
1882 /* Return true if we can use gather/scatter internal functions to
1883 vectorize STMT_INFO, which is a grouped or strided load or store.
1884 MASKED_P is true if load or store is conditional. When returning
1885 true, fill in GS_INFO with the information required to perform the
1886 operation. */
1887
1888 static bool
1889 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1890 loop_vec_info loop_vinfo, bool masked_p,
1891 gather_scatter_info *gs_info)
1892 {
1893 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1894 || gs_info->decl)
1895 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1896 masked_p, gs_info);
1897
1898 tree old_offset_type = TREE_TYPE (gs_info->offset);
1899 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1900
1901 gcc_assert (TYPE_PRECISION (new_offset_type)
1902 >= TYPE_PRECISION (old_offset_type));
1903 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1904
1905 if (dump_enabled_p ())
1906 dump_printf_loc (MSG_NOTE, vect_location,
1907 "using gather/scatter for strided/grouped access,"
1908 " scale = %d\n", gs_info->scale);
1909
1910 return true;
1911 }
1912
1913 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1914 elements with a known constant step. Return -1 if that step
1915 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1916
1917 static int
1918 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1919 {
1920 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1921 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1922 size_zero_node);
1923 }
1924
1925 /* If the target supports a permute mask that reverses the elements in
1926 a vector of type VECTYPE, return that mask, otherwise return null. */
1927
1928 static tree
1929 perm_mask_for_reverse (tree vectype)
1930 {
1931 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1932
1933 /* The encoding has a single stepped pattern. */
1934 vec_perm_builder sel (nunits, 1, 3);
1935 for (int i = 0; i < 3; ++i)
1936 sel.quick_push (nunits - 1 - i);
1937
1938 vec_perm_indices indices (sel, 1, nunits);
1939 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1940 return NULL_TREE;
1941 return vect_gen_perm_mask_checked (vectype, indices);
1942 }
1943
1944 /* A subroutine of get_load_store_type, with a subset of the same
1945 arguments. Handle the case where STMT_INFO is a load or store that
1946 accesses consecutive elements with a negative step. */
1947
1948 static vect_memory_access_type
1949 get_negative_load_store_type (vec_info *vinfo,
1950 stmt_vec_info stmt_info, tree vectype,
1951 vec_load_store_type vls_type,
1952 unsigned int ncopies)
1953 {
1954 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1955 dr_alignment_support alignment_support_scheme;
1956
1957 if (ncopies > 1)
1958 {
1959 if (dump_enabled_p ())
1960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1961 "multiple types with negative step.\n");
1962 return VMAT_ELEMENTWISE;
1963 }
1964
1965 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1966 dr_info, false);
1967 if (alignment_support_scheme != dr_aligned
1968 && alignment_support_scheme != dr_unaligned_supported)
1969 {
1970 if (dump_enabled_p ())
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972 "negative step but alignment required.\n");
1973 return VMAT_ELEMENTWISE;
1974 }
1975
1976 if (vls_type == VLS_STORE_INVARIANT)
1977 {
1978 if (dump_enabled_p ())
1979 dump_printf_loc (MSG_NOTE, vect_location,
1980 "negative step with invariant source;"
1981 " no permute needed.\n");
1982 return VMAT_CONTIGUOUS_DOWN;
1983 }
1984
1985 if (!perm_mask_for_reverse (vectype))
1986 {
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1989 "negative step and reversing not supported.\n");
1990 return VMAT_ELEMENTWISE;
1991 }
1992
1993 return VMAT_CONTIGUOUS_REVERSE;
1994 }
1995
1996 /* STMT_INFO is either a masked or unconditional store. Return the value
1997 being stored. */
1998
1999 tree
2000 vect_get_store_rhs (stmt_vec_info stmt_info)
2001 {
2002 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2003 {
2004 gcc_assert (gimple_assign_single_p (assign));
2005 return gimple_assign_rhs1 (assign);
2006 }
2007 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2008 {
2009 internal_fn ifn = gimple_call_internal_fn (call);
2010 int index = internal_fn_stored_value_index (ifn);
2011 gcc_assert (index >= 0);
2012 return gimple_call_arg (call, index);
2013 }
2014 gcc_unreachable ();
2015 }
2016
2017 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2018
2019 This function returns a vector type which can be composed with NETLS pieces,
2020 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2021 same vector size as the return vector. It checks target whether supports
2022 pieces-size vector mode for construction firstly, if target fails to, check
2023 pieces-size scalar mode for construction further. It returns NULL_TREE if
2024 fails to find the available composition.
2025
2026 For example, for (vtype=V16QI, nelts=4), we can probably get:
2027 - V16QI with PTYPE V4QI.
2028 - V4SI with PTYPE SI.
2029 - NULL_TREE. */
2030
2031 static tree
2032 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2033 {
2034 gcc_assert (VECTOR_TYPE_P (vtype));
2035 gcc_assert (known_gt (nelts, 0U));
2036
2037 machine_mode vmode = TYPE_MODE (vtype);
2038 if (!VECTOR_MODE_P (vmode))
2039 return NULL_TREE;
2040
2041 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2042 unsigned int pbsize;
2043 if (constant_multiple_p (vbsize, nelts, &pbsize))
2044 {
2045 /* First check if vec_init optab supports construction from
2046 vector pieces directly. */
2047 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2048 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2049 machine_mode rmode;
2050 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2051 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2052 != CODE_FOR_nothing))
2053 {
2054 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2055 return vtype;
2056 }
2057
2058 /* Otherwise check if exists an integer type of the same piece size and
2059 if vec_init optab supports construction from it directly. */
2060 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2061 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2062 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2063 != CODE_FOR_nothing))
2064 {
2065 *ptype = build_nonstandard_integer_type (pbsize, 1);
2066 return build_vector_type (*ptype, nelts);
2067 }
2068 }
2069
2070 return NULL_TREE;
2071 }
2072
2073 /* A subroutine of get_load_store_type, with a subset of the same
2074 arguments. Handle the case where STMT_INFO is part of a grouped load
2075 or store.
2076
2077 For stores, the statements in the group are all consecutive
2078 and there is no gap at the end. For loads, the statements in the
2079 group might not be consecutive; there can be gaps between statements
2080 as well as at the end. */
2081
2082 static bool
2083 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2084 tree vectype, bool slp,
2085 bool masked_p, vec_load_store_type vls_type,
2086 vect_memory_access_type *memory_access_type,
2087 gather_scatter_info *gs_info)
2088 {
2089 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2090 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2091 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2092 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2093 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2094 bool single_element_p = (stmt_info == first_stmt_info
2095 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2096 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2097 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2098
2099 /* True if the vectorized statements would access beyond the last
2100 statement in the group. */
2101 bool overrun_p = false;
2102
2103 /* True if we can cope with such overrun by peeling for gaps, so that
2104 there is at least one final scalar iteration after the vector loop. */
2105 bool can_overrun_p = (!masked_p
2106 && vls_type == VLS_LOAD
2107 && loop_vinfo
2108 && !loop->inner);
2109
2110 /* There can only be a gap at the end of the group if the stride is
2111 known at compile time. */
2112 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2113
2114 /* Stores can't yet have gaps. */
2115 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2116
2117 if (slp)
2118 {
2119 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2120 {
2121 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2122 separated by the stride, until we have a complete vector.
2123 Fall back to scalar accesses if that isn't possible. */
2124 if (multiple_p (nunits, group_size))
2125 *memory_access_type = VMAT_STRIDED_SLP;
2126 else
2127 *memory_access_type = VMAT_ELEMENTWISE;
2128 }
2129 else
2130 {
2131 overrun_p = loop_vinfo && gap != 0;
2132 if (overrun_p && vls_type != VLS_LOAD)
2133 {
2134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2135 "Grouped store with gaps requires"
2136 " non-consecutive accesses\n");
2137 return false;
2138 }
2139 /* An overrun is fine if the trailing elements are smaller
2140 than the alignment boundary B. Every vector access will
2141 be a multiple of B and so we are guaranteed to access a
2142 non-gap element in the same B-sized block. */
2143 if (overrun_p
2144 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2145 / vect_get_scalar_dr_size (first_dr_info)))
2146 overrun_p = false;
2147
2148 /* If the gap splits the vector in half and the target
2149 can do half-vector operations avoid the epilogue peeling
2150 by simply loading half of the vector only. Usually
2151 the construction with an upper zero half will be elided. */
2152 dr_alignment_support alignment_support_scheme;
2153 tree half_vtype;
2154 if (overrun_p
2155 && !masked_p
2156 && (((alignment_support_scheme
2157 = vect_supportable_dr_alignment (vinfo,
2158 first_dr_info, false)))
2159 == dr_aligned
2160 || alignment_support_scheme == dr_unaligned_supported)
2161 && known_eq (nunits, (group_size - gap) * 2)
2162 && known_eq (nunits, group_size)
2163 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2164 != NULL_TREE))
2165 overrun_p = false;
2166
2167 if (overrun_p && !can_overrun_p)
2168 {
2169 if (dump_enabled_p ())
2170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2171 "Peeling for outer loop is not supported\n");
2172 return false;
2173 }
2174 int cmp = compare_step_with_zero (vinfo, stmt_info);
2175 if (cmp < 0)
2176 *memory_access_type = get_negative_load_store_type
2177 (vinfo, stmt_info, vectype, vls_type, 1);
2178 else
2179 {
2180 gcc_assert (!loop_vinfo || cmp > 0);
2181 *memory_access_type = VMAT_CONTIGUOUS;
2182 }
2183 }
2184 }
2185 else
2186 {
2187 /* We can always handle this case using elementwise accesses,
2188 but see if something more efficient is available. */
2189 *memory_access_type = VMAT_ELEMENTWISE;
2190
2191 /* If there is a gap at the end of the group then these optimizations
2192 would access excess elements in the last iteration. */
2193 bool would_overrun_p = (gap != 0);
2194 /* An overrun is fine if the trailing elements are smaller than the
2195 alignment boundary B. Every vector access will be a multiple of B
2196 and so we are guaranteed to access a non-gap element in the
2197 same B-sized block. */
2198 if (would_overrun_p
2199 && !masked_p
2200 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2201 / vect_get_scalar_dr_size (first_dr_info)))
2202 would_overrun_p = false;
2203
2204 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2205 && (can_overrun_p || !would_overrun_p)
2206 && compare_step_with_zero (vinfo, stmt_info) > 0)
2207 {
2208 /* First cope with the degenerate case of a single-element
2209 vector. */
2210 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2211 *memory_access_type = VMAT_CONTIGUOUS;
2212
2213 /* Otherwise try using LOAD/STORE_LANES. */
2214 if (*memory_access_type == VMAT_ELEMENTWISE
2215 && (vls_type == VLS_LOAD
2216 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2217 : vect_store_lanes_supported (vectype, group_size,
2218 masked_p)))
2219 {
2220 *memory_access_type = VMAT_LOAD_STORE_LANES;
2221 overrun_p = would_overrun_p;
2222 }
2223
2224 /* If that fails, try using permuting loads. */
2225 if (*memory_access_type == VMAT_ELEMENTWISE
2226 && (vls_type == VLS_LOAD
2227 ? vect_grouped_load_supported (vectype, single_element_p,
2228 group_size)
2229 : vect_grouped_store_supported (vectype, group_size)))
2230 {
2231 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2232 overrun_p = would_overrun_p;
2233 }
2234 }
2235
2236 /* As a last resort, trying using a gather load or scatter store.
2237
2238 ??? Although the code can handle all group sizes correctly,
2239 it probably isn't a win to use separate strided accesses based
2240 on nearby locations. Or, even if it's a win over scalar code,
2241 it might not be a win over vectorizing at a lower VF, if that
2242 allows us to use contiguous accesses. */
2243 if (*memory_access_type == VMAT_ELEMENTWISE
2244 && single_element_p
2245 && loop_vinfo
2246 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2247 masked_p, gs_info))
2248 *memory_access_type = VMAT_GATHER_SCATTER;
2249 }
2250
2251 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2252 {
2253 /* STMT is the leader of the group. Check the operands of all the
2254 stmts of the group. */
2255 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2256 while (next_stmt_info)
2257 {
2258 tree op = vect_get_store_rhs (next_stmt_info);
2259 enum vect_def_type dt;
2260 if (!vect_is_simple_use (op, vinfo, &dt))
2261 {
2262 if (dump_enabled_p ())
2263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2264 "use not simple.\n");
2265 return false;
2266 }
2267 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2268 }
2269 }
2270
2271 if (overrun_p)
2272 {
2273 gcc_assert (can_overrun_p);
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "Data access with gaps requires scalar "
2277 "epilogue loop\n");
2278 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2279 }
2280
2281 return true;
2282 }
2283
2284 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2285 if there is a memory access type that the vectorized form can use,
2286 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2287 or scatters, fill in GS_INFO accordingly.
2288
2289 SLP says whether we're performing SLP rather than loop vectorization.
2290 MASKED_P is true if the statement is conditional on a vectorized mask.
2291 VECTYPE is the vector type that the vectorized statements will use.
2292 NCOPIES is the number of vector statements that will be needed. */
2293
2294 static bool
2295 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2296 tree vectype, bool slp,
2297 bool masked_p, vec_load_store_type vls_type,
2298 unsigned int ncopies,
2299 vect_memory_access_type *memory_access_type,
2300 gather_scatter_info *gs_info)
2301 {
2302 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2303 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2304 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2305 {
2306 *memory_access_type = VMAT_GATHER_SCATTER;
2307 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2308 gcc_unreachable ();
2309 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2310 &gs_info->offset_dt,
2311 &gs_info->offset_vectype))
2312 {
2313 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2315 "%s index use not simple.\n",
2316 vls_type == VLS_LOAD ? "gather" : "scatter");
2317 return false;
2318 }
2319 }
2320 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2321 {
2322 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp, masked_p,
2323 vls_type, memory_access_type, gs_info))
2324 return false;
2325 }
2326 else if (STMT_VINFO_STRIDED_P (stmt_info))
2327 {
2328 gcc_assert (!slp);
2329 if (loop_vinfo
2330 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2331 masked_p, gs_info))
2332 *memory_access_type = VMAT_GATHER_SCATTER;
2333 else
2334 *memory_access_type = VMAT_ELEMENTWISE;
2335 }
2336 else
2337 {
2338 int cmp = compare_step_with_zero (vinfo, stmt_info);
2339 if (cmp < 0)
2340 *memory_access_type = get_negative_load_store_type
2341 (vinfo, stmt_info, vectype, vls_type, ncopies);
2342 else if (cmp == 0)
2343 {
2344 gcc_assert (vls_type == VLS_LOAD);
2345 *memory_access_type = VMAT_INVARIANT;
2346 }
2347 else
2348 *memory_access_type = VMAT_CONTIGUOUS;
2349 }
2350
2351 if ((*memory_access_type == VMAT_ELEMENTWISE
2352 || *memory_access_type == VMAT_STRIDED_SLP)
2353 && !nunits.is_constant ())
2354 {
2355 if (dump_enabled_p ())
2356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2357 "Not using elementwise accesses due to variable "
2358 "vectorization factor.\n");
2359 return false;
2360 }
2361
2362 /* FIXME: At the moment the cost model seems to underestimate the
2363 cost of using elementwise accesses. This check preserves the
2364 traditional behavior until that can be fixed. */
2365 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2366 if (!first_stmt_info)
2367 first_stmt_info = stmt_info;
2368 if (*memory_access_type == VMAT_ELEMENTWISE
2369 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2370 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2371 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2372 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2373 {
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "not falling back to elementwise accesses\n");
2377 return false;
2378 }
2379 return true;
2380 }
2381
2382 /* Return true if boolean argument MASK is suitable for vectorizing
2383 conditional operation STMT_INFO. When returning true, store the type
2384 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2385 in *MASK_VECTYPE_OUT. */
2386
2387 static bool
2388 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2389 vect_def_type *mask_dt_out,
2390 tree *mask_vectype_out)
2391 {
2392 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2393 {
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 "mask argument is not a boolean.\n");
2397 return false;
2398 }
2399
2400 if (TREE_CODE (mask) != SSA_NAME)
2401 {
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404 "mask argument is not an SSA name.\n");
2405 return false;
2406 }
2407
2408 enum vect_def_type mask_dt;
2409 tree mask_vectype;
2410 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2411 {
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2414 "mask use not simple.\n");
2415 return false;
2416 }
2417
2418 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2419 if (!mask_vectype)
2420 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2421
2422 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2423 {
2424 if (dump_enabled_p ())
2425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2426 "could not find an appropriate vector mask type.\n");
2427 return false;
2428 }
2429
2430 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2431 TYPE_VECTOR_SUBPARTS (vectype)))
2432 {
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2435 "vector mask type %T"
2436 " does not match vector data type %T.\n",
2437 mask_vectype, vectype);
2438
2439 return false;
2440 }
2441
2442 *mask_dt_out = mask_dt;
2443 *mask_vectype_out = mask_vectype;
2444 return true;
2445 }
2446
2447 /* Return true if stored value RHS is suitable for vectorizing store
2448 statement STMT_INFO. When returning true, store the type of the
2449 definition in *RHS_DT_OUT, the type of the vectorized store value in
2450 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2451
2452 static bool
2453 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2454 slp_tree slp_node, tree rhs,
2455 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2456 vec_load_store_type *vls_type_out)
2457 {
2458 /* In the case this is a store from a constant make sure
2459 native_encode_expr can handle it. */
2460 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2461 {
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "cannot encode constant as a byte sequence.\n");
2465 return false;
2466 }
2467
2468 enum vect_def_type rhs_dt;
2469 tree rhs_vectype;
2470 slp_tree slp_op;
2471 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2472 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2473 {
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2476 "use not simple.\n");
2477 return false;
2478 }
2479
2480 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2481 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2482 {
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2485 "incompatible vector types.\n");
2486 return false;
2487 }
2488
2489 *rhs_dt_out = rhs_dt;
2490 *rhs_vectype_out = rhs_vectype;
2491 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2492 *vls_type_out = VLS_STORE_INVARIANT;
2493 else
2494 *vls_type_out = VLS_STORE;
2495 return true;
2496 }
2497
2498 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2499 Note that we support masks with floating-point type, in which case the
2500 floats are interpreted as a bitmask. */
2501
2502 static tree
2503 vect_build_all_ones_mask (vec_info *vinfo,
2504 stmt_vec_info stmt_info, tree masktype)
2505 {
2506 if (TREE_CODE (masktype) == INTEGER_TYPE)
2507 return build_int_cst (masktype, -1);
2508 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2509 {
2510 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2511 mask = build_vector_from_val (masktype, mask);
2512 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2513 }
2514 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2515 {
2516 REAL_VALUE_TYPE r;
2517 long tmp[6];
2518 for (int j = 0; j < 6; ++j)
2519 tmp[j] = -1;
2520 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2521 tree mask = build_real (TREE_TYPE (masktype), r);
2522 mask = build_vector_from_val (masktype, mask);
2523 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2524 }
2525 gcc_unreachable ();
2526 }
2527
2528 /* Build an all-zero merge value of type VECTYPE while vectorizing
2529 STMT_INFO as a gather load. */
2530
2531 static tree
2532 vect_build_zero_merge_argument (vec_info *vinfo,
2533 stmt_vec_info stmt_info, tree vectype)
2534 {
2535 tree merge;
2536 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2537 merge = build_int_cst (TREE_TYPE (vectype), 0);
2538 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2539 {
2540 REAL_VALUE_TYPE r;
2541 long tmp[6];
2542 for (int j = 0; j < 6; ++j)
2543 tmp[j] = 0;
2544 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2545 merge = build_real (TREE_TYPE (vectype), r);
2546 }
2547 else
2548 gcc_unreachable ();
2549 merge = build_vector_from_val (vectype, merge);
2550 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2551 }
2552
2553 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2554 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2555 the gather load operation. If the load is conditional, MASK is the
2556 unvectorized condition and MASK_DT is its definition type, otherwise
2557 MASK is null. */
2558
2559 static void
2560 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2561 gimple_stmt_iterator *gsi,
2562 gimple **vec_stmt,
2563 gather_scatter_info *gs_info,
2564 tree mask)
2565 {
2566 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2567 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2569 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2570 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2571 edge pe = loop_preheader_edge (loop);
2572 enum { NARROW, NONE, WIDEN } modifier;
2573 poly_uint64 gather_off_nunits
2574 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2575
2576 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2577 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2578 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2579 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2580 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2581 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2582 tree scaletype = TREE_VALUE (arglist);
2583 tree real_masktype = masktype;
2584 gcc_checking_assert (types_compatible_p (srctype, rettype)
2585 && (!mask
2586 || TREE_CODE (masktype) == INTEGER_TYPE
2587 || types_compatible_p (srctype, masktype)));
2588 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2589 masktype = truth_type_for (srctype);
2590
2591 tree mask_halftype = masktype;
2592 tree perm_mask = NULL_TREE;
2593 tree mask_perm_mask = NULL_TREE;
2594 if (known_eq (nunits, gather_off_nunits))
2595 modifier = NONE;
2596 else if (known_eq (nunits * 2, gather_off_nunits))
2597 {
2598 modifier = WIDEN;
2599
2600 /* Currently widening gathers and scatters are only supported for
2601 fixed-length vectors. */
2602 int count = gather_off_nunits.to_constant ();
2603 vec_perm_builder sel (count, count, 1);
2604 for (int i = 0; i < count; ++i)
2605 sel.quick_push (i | (count / 2));
2606
2607 vec_perm_indices indices (sel, 1, count);
2608 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2609 indices);
2610 }
2611 else if (known_eq (nunits, gather_off_nunits * 2))
2612 {
2613 modifier = NARROW;
2614
2615 /* Currently narrowing gathers and scatters are only supported for
2616 fixed-length vectors. */
2617 int count = nunits.to_constant ();
2618 vec_perm_builder sel (count, count, 1);
2619 sel.quick_grow (count);
2620 for (int i = 0; i < count; ++i)
2621 sel[i] = i < count / 2 ? i : i + count / 2;
2622 vec_perm_indices indices (sel, 2, count);
2623 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2624
2625 ncopies *= 2;
2626
2627 if (mask && masktype == real_masktype)
2628 {
2629 for (int i = 0; i < count; ++i)
2630 sel[i] = i | (count / 2);
2631 indices.new_vector (sel, 2, count);
2632 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2633 }
2634 else if (mask)
2635 mask_halftype = truth_type_for (gs_info->offset_vectype);
2636 }
2637 else
2638 gcc_unreachable ();
2639
2640 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2641 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2642
2643 tree ptr = fold_convert (ptrtype, gs_info->base);
2644 if (!is_gimple_min_invariant (ptr))
2645 {
2646 gimple_seq seq;
2647 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2648 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2649 gcc_assert (!new_bb);
2650 }
2651
2652 tree scale = build_int_cst (scaletype, gs_info->scale);
2653
2654 tree vec_oprnd0 = NULL_TREE;
2655 tree vec_mask = NULL_TREE;
2656 tree src_op = NULL_TREE;
2657 tree mask_op = NULL_TREE;
2658 tree prev_res = NULL_TREE;
2659
2660 if (!mask)
2661 {
2662 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2663 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2664 }
2665
2666 auto_vec<tree> vec_oprnds0;
2667 auto_vec<tree> vec_masks;
2668 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2669 modifier == WIDEN ? ncopies / 2 : ncopies,
2670 gs_info->offset, &vec_oprnds0);
2671 if (mask)
2672 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2673 modifier == NARROW ? ncopies / 2 : ncopies,
2674 mask, &vec_masks);
2675 for (int j = 0; j < ncopies; ++j)
2676 {
2677 tree op, var;
2678 if (modifier == WIDEN && (j & 1))
2679 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2680 perm_mask, stmt_info, gsi);
2681 else
2682 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2683
2684 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2685 {
2686 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2687 TYPE_VECTOR_SUBPARTS (idxtype)));
2688 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2689 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2690 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2691 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2692 op = var;
2693 }
2694
2695 if (mask)
2696 {
2697 if (mask_perm_mask && (j & 1))
2698 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2699 mask_perm_mask, stmt_info, gsi);
2700 else
2701 {
2702 if (modifier == NARROW)
2703 {
2704 if ((j & 1) == 0)
2705 vec_mask = vec_masks[j / 2];
2706 }
2707 else
2708 vec_mask = vec_masks[j];
2709
2710 mask_op = vec_mask;
2711 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2712 {
2713 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2714 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2715 gcc_assert (known_eq (sub1, sub2));
2716 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2717 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2718 gassign *new_stmt
2719 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2720 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2721 mask_op = var;
2722 }
2723 }
2724 if (modifier == NARROW && masktype != real_masktype)
2725 {
2726 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2727 gassign *new_stmt
2728 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2729 : VEC_UNPACK_LO_EXPR,
2730 mask_op);
2731 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2732 mask_op = var;
2733 }
2734 src_op = mask_op;
2735 }
2736
2737 tree mask_arg = mask_op;
2738 if (masktype != real_masktype)
2739 {
2740 tree utype, optype = TREE_TYPE (mask_op);
2741 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2742 utype = real_masktype;
2743 else
2744 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2745 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2746 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2747 gassign *new_stmt
2748 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2750 mask_arg = var;
2751 if (!useless_type_conversion_p (real_masktype, utype))
2752 {
2753 gcc_assert (TYPE_PRECISION (utype)
2754 <= TYPE_PRECISION (real_masktype));
2755 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2756 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2757 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2758 mask_arg = var;
2759 }
2760 src_op = build_zero_cst (srctype);
2761 }
2762 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2763 mask_arg, scale);
2764
2765 if (!useless_type_conversion_p (vectype, rettype))
2766 {
2767 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2768 TYPE_VECTOR_SUBPARTS (rettype)));
2769 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2770 gimple_call_set_lhs (new_stmt, op);
2771 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2772 var = make_ssa_name (vec_dest);
2773 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2774 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2775 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2776 }
2777 else
2778 {
2779 var = make_ssa_name (vec_dest, new_stmt);
2780 gimple_call_set_lhs (new_stmt, var);
2781 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2782 }
2783
2784 if (modifier == NARROW)
2785 {
2786 if ((j & 1) == 0)
2787 {
2788 prev_res = var;
2789 continue;
2790 }
2791 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2792 stmt_info, gsi);
2793 new_stmt = SSA_NAME_DEF_STMT (var);
2794 }
2795
2796 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2797 }
2798 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2799 }
2800
2801 /* Prepare the base and offset in GS_INFO for vectorization.
2802 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2803 to the vectorized offset argument for the first copy of STMT_INFO.
2804 STMT_INFO is the statement described by GS_INFO and LOOP is the
2805 containing loop. */
2806
2807 static void
2808 vect_get_gather_scatter_ops (vec_info *vinfo,
2809 class loop *loop, stmt_vec_info stmt_info,
2810 gather_scatter_info *gs_info,
2811 tree *dataref_ptr, vec<tree> *vec_offset,
2812 unsigned ncopies)
2813 {
2814 gimple_seq stmts = NULL;
2815 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2816 if (stmts != NULL)
2817 {
2818 basic_block new_bb;
2819 edge pe = loop_preheader_edge (loop);
2820 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2821 gcc_assert (!new_bb);
2822 }
2823 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2824 vec_offset, gs_info->offset_vectype);
2825 }
2826
2827 /* Prepare to implement a grouped or strided load or store using
2828 the gather load or scatter store operation described by GS_INFO.
2829 STMT_INFO is the load or store statement.
2830
2831 Set *DATAREF_BUMP to the amount that should be added to the base
2832 address after each copy of the vectorized statement. Set *VEC_OFFSET
2833 to an invariant offset vector in which element I has the value
2834 I * DR_STEP / SCALE. */
2835
2836 static void
2837 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2838 loop_vec_info loop_vinfo,
2839 gather_scatter_info *gs_info,
2840 tree *dataref_bump, tree *vec_offset)
2841 {
2842 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2843 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2844 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2845 gimple_seq stmts;
2846
2847 tree bump = size_binop (MULT_EXPR,
2848 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2849 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2850 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2851 if (stmts)
2852 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2853
2854 /* The offset given in GS_INFO can have pointer type, so use the element
2855 type of the vector instead. */
2856 tree offset_type = TREE_TYPE (gs_info->offset);
2857 offset_type = TREE_TYPE (gs_info->offset_vectype);
2858
2859 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2860 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2861 ssize_int (gs_info->scale));
2862 step = fold_convert (offset_type, step);
2863 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2864
2865 /* Create {0, X, X*2, X*3, ...}. */
2866 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
2867 build_zero_cst (offset_type), step);
2868 if (stmts)
2869 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2870 }
2871
2872 /* Return the amount that should be added to a vector pointer to move
2873 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2874 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2875 vectorization. */
2876
2877 static tree
2878 vect_get_data_ptr_increment (vec_info *vinfo,
2879 dr_vec_info *dr_info, tree aggr_type,
2880 vect_memory_access_type memory_access_type)
2881 {
2882 if (memory_access_type == VMAT_INVARIANT)
2883 return size_zero_node;
2884
2885 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2886 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2887 if (tree_int_cst_sgn (step) == -1)
2888 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2889 return iv_step;
2890 }
2891
2892 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2893
2894 static bool
2895 vectorizable_bswap (vec_info *vinfo,
2896 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2897 gimple **vec_stmt, slp_tree slp_node,
2898 slp_tree *slp_op,
2899 tree vectype_in, stmt_vector_for_cost *cost_vec)
2900 {
2901 tree op, vectype;
2902 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2903 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2904 unsigned ncopies;
2905
2906 op = gimple_call_arg (stmt, 0);
2907 vectype = STMT_VINFO_VECTYPE (stmt_info);
2908 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2909
2910 /* Multiple types in SLP are handled by creating the appropriate number of
2911 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2912 case of SLP. */
2913 if (slp_node)
2914 ncopies = 1;
2915 else
2916 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2917
2918 gcc_assert (ncopies >= 1);
2919
2920 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2921 if (! char_vectype)
2922 return false;
2923
2924 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2925 unsigned word_bytes;
2926 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2927 return false;
2928
2929 /* The encoding uses one stepped pattern for each byte in the word. */
2930 vec_perm_builder elts (num_bytes, word_bytes, 3);
2931 for (unsigned i = 0; i < 3; ++i)
2932 for (unsigned j = 0; j < word_bytes; ++j)
2933 elts.quick_push ((i + 1) * word_bytes - j - 1);
2934
2935 vec_perm_indices indices (elts, 1, num_bytes);
2936 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2937 return false;
2938
2939 if (! vec_stmt)
2940 {
2941 if (slp_node
2942 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2943 {
2944 if (dump_enabled_p ())
2945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2946 "incompatible vector types for invariants\n");
2947 return false;
2948 }
2949
2950 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2951 DUMP_VECT_SCOPE ("vectorizable_bswap");
2952 if (! slp_node)
2953 {
2954 record_stmt_cost (cost_vec,
2955 1, vector_stmt, stmt_info, 0, vect_prologue);
2956 record_stmt_cost (cost_vec,
2957 ncopies, vec_perm, stmt_info, 0, vect_body);
2958 }
2959 return true;
2960 }
2961
2962 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2963
2964 /* Transform. */
2965 vec<tree> vec_oprnds = vNULL;
2966 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
2967 op, &vec_oprnds);
2968 /* Arguments are ready. create the new vector stmt. */
2969 unsigned i;
2970 tree vop;
2971 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2972 {
2973 gimple *new_stmt;
2974 tree tem = make_ssa_name (char_vectype);
2975 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2976 char_vectype, vop));
2977 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2978 tree tem2 = make_ssa_name (char_vectype);
2979 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2980 tem, tem, bswap_vconst);
2981 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2982 tem = make_ssa_name (vectype);
2983 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2984 vectype, tem2));
2985 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2986 if (slp_node)
2987 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2988 else
2989 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2990 }
2991
2992 if (!slp_node)
2993 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2994
2995 vec_oprnds.release ();
2996 return true;
2997 }
2998
2999 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3000 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3001 in a single step. On success, store the binary pack code in
3002 *CONVERT_CODE. */
3003
3004 static bool
3005 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3006 tree_code *convert_code)
3007 {
3008 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3009 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3010 return false;
3011
3012 tree_code code;
3013 int multi_step_cvt = 0;
3014 auto_vec <tree, 8> interm_types;
3015 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3016 &code, &multi_step_cvt, &interm_types)
3017 || multi_step_cvt)
3018 return false;
3019
3020 *convert_code = code;
3021 return true;
3022 }
3023
3024 /* Function vectorizable_call.
3025
3026 Check if STMT_INFO performs a function call that can be vectorized.
3027 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3028 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3029 Return true if STMT_INFO is vectorizable in this way. */
3030
3031 static bool
3032 vectorizable_call (vec_info *vinfo,
3033 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3034 gimple **vec_stmt, slp_tree slp_node,
3035 stmt_vector_for_cost *cost_vec)
3036 {
3037 gcall *stmt;
3038 tree vec_dest;
3039 tree scalar_dest;
3040 tree op;
3041 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3042 tree vectype_out, vectype_in;
3043 poly_uint64 nunits_in;
3044 poly_uint64 nunits_out;
3045 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3046 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3047 tree fndecl, new_temp, rhs_type;
3048 enum vect_def_type dt[4]
3049 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3050 vect_unknown_def_type };
3051 tree vectypes[ARRAY_SIZE (dt)] = {};
3052 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3053 int ndts = ARRAY_SIZE (dt);
3054 int ncopies, j;
3055 auto_vec<tree, 8> vargs;
3056 auto_vec<tree, 8> orig_vargs;
3057 enum { NARROW, NONE, WIDEN } modifier;
3058 size_t i, nargs;
3059 tree lhs;
3060
3061 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3062 return false;
3063
3064 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3065 && ! vec_stmt)
3066 return false;
3067
3068 /* Is STMT_INFO a vectorizable call? */
3069 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3070 if (!stmt)
3071 return false;
3072
3073 if (gimple_call_internal_p (stmt)
3074 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3075 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3076 /* Handled by vectorizable_load and vectorizable_store. */
3077 return false;
3078
3079 if (gimple_call_lhs (stmt) == NULL_TREE
3080 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3081 return false;
3082
3083 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3084
3085 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3086
3087 /* Process function arguments. */
3088 rhs_type = NULL_TREE;
3089 vectype_in = NULL_TREE;
3090 nargs = gimple_call_num_args (stmt);
3091
3092 /* Bail out if the function has more than four arguments, we do not have
3093 interesting builtin functions to vectorize with more than two arguments
3094 except for fma. No arguments is also not good. */
3095 if (nargs == 0 || nargs > 4)
3096 return false;
3097
3098 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3099 combined_fn cfn = gimple_call_combined_fn (stmt);
3100 if (cfn == CFN_GOMP_SIMD_LANE)
3101 {
3102 nargs = 0;
3103 rhs_type = unsigned_type_node;
3104 }
3105
3106 int mask_opno = -1;
3107 if (internal_fn_p (cfn))
3108 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3109
3110 for (i = 0; i < nargs; i++)
3111 {
3112 if ((int) i == mask_opno)
3113 {
3114 op = gimple_call_arg (stmt, i);
3115 if (!vect_check_scalar_mask (vinfo,
3116 stmt_info, op, &dt[i], &vectypes[i]))
3117 return false;
3118 continue;
3119 }
3120
3121 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3122 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3123 {
3124 if (dump_enabled_p ())
3125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3126 "use not simple.\n");
3127 return false;
3128 }
3129
3130 /* We can only handle calls with arguments of the same type. */
3131 if (rhs_type
3132 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3133 {
3134 if (dump_enabled_p ())
3135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3136 "argument types differ.\n");
3137 return false;
3138 }
3139 if (!rhs_type)
3140 rhs_type = TREE_TYPE (op);
3141
3142 if (!vectype_in)
3143 vectype_in = vectypes[i];
3144 else if (vectypes[i]
3145 && !types_compatible_p (vectypes[i], vectype_in))
3146 {
3147 if (dump_enabled_p ())
3148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3149 "argument vector types differ.\n");
3150 return false;
3151 }
3152 }
3153 /* If all arguments are external or constant defs, infer the vector type
3154 from the scalar type. */
3155 if (!vectype_in)
3156 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3157 if (vec_stmt)
3158 gcc_assert (vectype_in);
3159 if (!vectype_in)
3160 {
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3163 "no vectype for scalar type %T\n", rhs_type);
3164
3165 return false;
3166 }
3167 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3168 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3169 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3170 by a pack of the two vectors into an SI vector. We would need
3171 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3172 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3173 {
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3176 "mismatched vector sizes %T and %T\n",
3177 vectype_in, vectype_out);
3178 return false;
3179 }
3180
3181 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3182 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3183 {
3184 if (dump_enabled_p ())
3185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3186 "mixed mask and nonmask vector types\n");
3187 return false;
3188 }
3189
3190 /* FORNOW */
3191 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3192 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3193 if (known_eq (nunits_in * 2, nunits_out))
3194 modifier = NARROW;
3195 else if (known_eq (nunits_out, nunits_in))
3196 modifier = NONE;
3197 else if (known_eq (nunits_out * 2, nunits_in))
3198 modifier = WIDEN;
3199 else
3200 return false;
3201
3202 /* We only handle functions that do not read or clobber memory. */
3203 if (gimple_vuse (stmt))
3204 {
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3207 "function reads from or writes to memory.\n");
3208 return false;
3209 }
3210
3211 /* For now, we only vectorize functions if a target specific builtin
3212 is available. TODO -- in some cases, it might be profitable to
3213 insert the calls for pieces of the vector, in order to be able
3214 to vectorize other operations in the loop. */
3215 fndecl = NULL_TREE;
3216 internal_fn ifn = IFN_LAST;
3217 tree callee = gimple_call_fndecl (stmt);
3218
3219 /* First try using an internal function. */
3220 tree_code convert_code = ERROR_MARK;
3221 if (cfn != CFN_LAST
3222 && (modifier == NONE
3223 || (modifier == NARROW
3224 && simple_integer_narrowing (vectype_out, vectype_in,
3225 &convert_code))))
3226 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3227 vectype_in);
3228
3229 /* If that fails, try asking for a target-specific built-in function. */
3230 if (ifn == IFN_LAST)
3231 {
3232 if (cfn != CFN_LAST)
3233 fndecl = targetm.vectorize.builtin_vectorized_function
3234 (cfn, vectype_out, vectype_in);
3235 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3236 fndecl = targetm.vectorize.builtin_md_vectorized_function
3237 (callee, vectype_out, vectype_in);
3238 }
3239
3240 if (ifn == IFN_LAST && !fndecl)
3241 {
3242 if (cfn == CFN_GOMP_SIMD_LANE
3243 && !slp_node
3244 && loop_vinfo
3245 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3246 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3247 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3248 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3249 {
3250 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3251 { 0, 1, 2, ... vf - 1 } vector. */
3252 gcc_assert (nargs == 0);
3253 }
3254 else if (modifier == NONE
3255 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3256 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3257 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3258 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3259 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3260 slp_op, vectype_in, cost_vec);
3261 else
3262 {
3263 if (dump_enabled_p ())
3264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3265 "function is not vectorizable.\n");
3266 return false;
3267 }
3268 }
3269
3270 if (slp_node)
3271 ncopies = 1;
3272 else if (modifier == NARROW && ifn == IFN_LAST)
3273 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3274 else
3275 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3276
3277 /* Sanity check: make sure that at least one copy of the vectorized stmt
3278 needs to be generated. */
3279 gcc_assert (ncopies >= 1);
3280
3281 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3282 if (!vec_stmt) /* transformation not required. */
3283 {
3284 if (slp_node)
3285 for (i = 0; i < nargs; ++i)
3286 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3287 {
3288 if (dump_enabled_p ())
3289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3290 "incompatible vector types for invariants\n");
3291 return false;
3292 }
3293 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3294 DUMP_VECT_SCOPE ("vectorizable_call");
3295 vect_model_simple_cost (vinfo, stmt_info,
3296 ncopies, dt, ndts, slp_node, cost_vec);
3297 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3298 record_stmt_cost (cost_vec, ncopies / 2,
3299 vec_promote_demote, stmt_info, 0, vect_body);
3300
3301 if (loop_vinfo && mask_opno >= 0)
3302 {
3303 unsigned int nvectors = (slp_node
3304 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3305 : ncopies);
3306 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3307 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3308 vectype_out, scalar_mask);
3309 }
3310 return true;
3311 }
3312
3313 /* Transform. */
3314
3315 if (dump_enabled_p ())
3316 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3317
3318 /* Handle def. */
3319 scalar_dest = gimple_call_lhs (stmt);
3320 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3321
3322 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3323
3324 if (modifier == NONE || ifn != IFN_LAST)
3325 {
3326 tree prev_res = NULL_TREE;
3327 vargs.safe_grow (nargs);
3328 orig_vargs.safe_grow (nargs);
3329 auto_vec<vec<tree> > vec_defs (nargs);
3330 for (j = 0; j < ncopies; ++j)
3331 {
3332 /* Build argument list for the vectorized call. */
3333 if (slp_node)
3334 {
3335 vec<tree> vec_oprnds0;
3336
3337 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3338 vec_oprnds0 = vec_defs[0];
3339
3340 /* Arguments are ready. Create the new vector stmt. */
3341 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3342 {
3343 size_t k;
3344 for (k = 0; k < nargs; k++)
3345 {
3346 vec<tree> vec_oprndsk = vec_defs[k];
3347 vargs[k] = vec_oprndsk[i];
3348 }
3349 gimple *new_stmt;
3350 if (modifier == NARROW)
3351 {
3352 /* We don't define any narrowing conditional functions
3353 at present. */
3354 gcc_assert (mask_opno < 0);
3355 tree half_res = make_ssa_name (vectype_in);
3356 gcall *call
3357 = gimple_build_call_internal_vec (ifn, vargs);
3358 gimple_call_set_lhs (call, half_res);
3359 gimple_call_set_nothrow (call, true);
3360 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3361 if ((i & 1) == 0)
3362 {
3363 prev_res = half_res;
3364 continue;
3365 }
3366 new_temp = make_ssa_name (vec_dest);
3367 new_stmt = gimple_build_assign (new_temp, convert_code,
3368 prev_res, half_res);
3369 vect_finish_stmt_generation (vinfo, stmt_info,
3370 new_stmt, gsi);
3371 }
3372 else
3373 {
3374 if (mask_opno >= 0 && masked_loop_p)
3375 {
3376 unsigned int vec_num = vec_oprnds0.length ();
3377 /* Always true for SLP. */
3378 gcc_assert (ncopies == 1);
3379 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3380 vectype_out, i);
3381 vargs[mask_opno] = prepare_load_store_mask
3382 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3383 }
3384
3385 gcall *call;
3386 if (ifn != IFN_LAST)
3387 call = gimple_build_call_internal_vec (ifn, vargs);
3388 else
3389 call = gimple_build_call_vec (fndecl, vargs);
3390 new_temp = make_ssa_name (vec_dest, call);
3391 gimple_call_set_lhs (call, new_temp);
3392 gimple_call_set_nothrow (call, true);
3393 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3394 new_stmt = call;
3395 }
3396 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3397 }
3398 continue;
3399 }
3400
3401 for (i = 0; i < nargs; i++)
3402 {
3403 op = gimple_call_arg (stmt, i);
3404 if (j == 0)
3405 {
3406 vec_defs.quick_push (vNULL);
3407 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3408 op, &vec_defs[i]);
3409 }
3410 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3411 }
3412
3413 if (mask_opno >= 0 && masked_loop_p)
3414 {
3415 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3416 vectype_out, j);
3417 vargs[mask_opno]
3418 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3419 vargs[mask_opno], gsi);
3420 }
3421
3422 gimple *new_stmt;
3423 if (cfn == CFN_GOMP_SIMD_LANE)
3424 {
3425 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3426 tree new_var
3427 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3428 gimple *init_stmt = gimple_build_assign (new_var, cst);
3429 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3430 new_temp = make_ssa_name (vec_dest);
3431 new_stmt = gimple_build_assign (new_temp, new_var);
3432 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3433 }
3434 else if (modifier == NARROW)
3435 {
3436 /* We don't define any narrowing conditional functions at
3437 present. */
3438 gcc_assert (mask_opno < 0);
3439 tree half_res = make_ssa_name (vectype_in);
3440 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3441 gimple_call_set_lhs (call, half_res);
3442 gimple_call_set_nothrow (call, true);
3443 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3444 if ((j & 1) == 0)
3445 {
3446 prev_res = half_res;
3447 continue;
3448 }
3449 new_temp = make_ssa_name (vec_dest);
3450 new_stmt = gimple_build_assign (new_temp, convert_code,
3451 prev_res, half_res);
3452 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3453 }
3454 else
3455 {
3456 gcall *call;
3457 if (ifn != IFN_LAST)
3458 call = gimple_build_call_internal_vec (ifn, vargs);
3459 else
3460 call = gimple_build_call_vec (fndecl, vargs);
3461 new_temp = make_ssa_name (vec_dest, call);
3462 gimple_call_set_lhs (call, new_temp);
3463 gimple_call_set_nothrow (call, true);
3464 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3465 new_stmt = call;
3466 }
3467
3468 if (j == (modifier == NARROW ? 1 : 0))
3469 *vec_stmt = new_stmt;
3470 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3471 }
3472 for (i = 0; i < nargs; i++)
3473 {
3474 vec<tree> vec_oprndsi = vec_defs[i];
3475 vec_oprndsi.release ();
3476 }
3477 }
3478 else if (modifier == NARROW)
3479 {
3480 auto_vec<vec<tree> > vec_defs (nargs);
3481 /* We don't define any narrowing conditional functions at present. */
3482 gcc_assert (mask_opno < 0);
3483 for (j = 0; j < ncopies; ++j)
3484 {
3485 /* Build argument list for the vectorized call. */
3486 if (j == 0)
3487 vargs.create (nargs * 2);
3488 else
3489 vargs.truncate (0);
3490
3491 if (slp_node)
3492 {
3493 vec<tree> vec_oprnds0;
3494
3495 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3496 vec_oprnds0 = vec_defs[0];
3497
3498 /* Arguments are ready. Create the new vector stmt. */
3499 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3500 {
3501 size_t k;
3502 vargs.truncate (0);
3503 for (k = 0; k < nargs; k++)
3504 {
3505 vec<tree> vec_oprndsk = vec_defs[k];
3506 vargs.quick_push (vec_oprndsk[i]);
3507 vargs.quick_push (vec_oprndsk[i + 1]);
3508 }
3509 gcall *call;
3510 if (ifn != IFN_LAST)
3511 call = gimple_build_call_internal_vec (ifn, vargs);
3512 else
3513 call = gimple_build_call_vec (fndecl, vargs);
3514 new_temp = make_ssa_name (vec_dest, call);
3515 gimple_call_set_lhs (call, new_temp);
3516 gimple_call_set_nothrow (call, true);
3517 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3518 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3519 }
3520 continue;
3521 }
3522
3523 for (i = 0; i < nargs; i++)
3524 {
3525 op = gimple_call_arg (stmt, i);
3526 if (j == 0)
3527 {
3528 vec_defs.quick_push (vNULL);
3529 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3530 op, &vec_defs[i], vectypes[i]);
3531 }
3532 vec_oprnd0 = vec_defs[i][2*j];
3533 vec_oprnd1 = vec_defs[i][2*j+1];
3534
3535 vargs.quick_push (vec_oprnd0);
3536 vargs.quick_push (vec_oprnd1);
3537 }
3538
3539 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3540 new_temp = make_ssa_name (vec_dest, new_stmt);
3541 gimple_call_set_lhs (new_stmt, new_temp);
3542 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3543
3544 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3545 }
3546
3547 if (!slp_node)
3548 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3549
3550 for (i = 0; i < nargs; i++)
3551 {
3552 vec<tree> vec_oprndsi = vec_defs[i];
3553 vec_oprndsi.release ();
3554 }
3555 }
3556 else
3557 /* No current target implements this case. */
3558 return false;
3559
3560 vargs.release ();
3561
3562 /* The call in STMT might prevent it from being removed in dce.
3563 We however cannot remove it here, due to the way the ssa name
3564 it defines is mapped to the new definition. So just replace
3565 rhs of the statement with something harmless. */
3566
3567 if (slp_node)
3568 return true;
3569
3570 stmt_info = vect_orig_stmt (stmt_info);
3571 lhs = gimple_get_lhs (stmt_info->stmt);
3572
3573 gassign *new_stmt
3574 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3575 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3576
3577 return true;
3578 }
3579
3580
3581 struct simd_call_arg_info
3582 {
3583 tree vectype;
3584 tree op;
3585 HOST_WIDE_INT linear_step;
3586 enum vect_def_type dt;
3587 unsigned int align;
3588 bool simd_lane_linear;
3589 };
3590
3591 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3592 is linear within simd lane (but not within whole loop), note it in
3593 *ARGINFO. */
3594
3595 static void
3596 vect_simd_lane_linear (tree op, class loop *loop,
3597 struct simd_call_arg_info *arginfo)
3598 {
3599 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3600
3601 if (!is_gimple_assign (def_stmt)
3602 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3603 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3604 return;
3605
3606 tree base = gimple_assign_rhs1 (def_stmt);
3607 HOST_WIDE_INT linear_step = 0;
3608 tree v = gimple_assign_rhs2 (def_stmt);
3609 while (TREE_CODE (v) == SSA_NAME)
3610 {
3611 tree t;
3612 def_stmt = SSA_NAME_DEF_STMT (v);
3613 if (is_gimple_assign (def_stmt))
3614 switch (gimple_assign_rhs_code (def_stmt))
3615 {
3616 case PLUS_EXPR:
3617 t = gimple_assign_rhs2 (def_stmt);
3618 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3619 return;
3620 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3621 v = gimple_assign_rhs1 (def_stmt);
3622 continue;
3623 case MULT_EXPR:
3624 t = gimple_assign_rhs2 (def_stmt);
3625 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3626 return;
3627 linear_step = tree_to_shwi (t);
3628 v = gimple_assign_rhs1 (def_stmt);
3629 continue;
3630 CASE_CONVERT:
3631 t = gimple_assign_rhs1 (def_stmt);
3632 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3633 || (TYPE_PRECISION (TREE_TYPE (v))
3634 < TYPE_PRECISION (TREE_TYPE (t))))
3635 return;
3636 if (!linear_step)
3637 linear_step = 1;
3638 v = t;
3639 continue;
3640 default:
3641 return;
3642 }
3643 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3644 && loop->simduid
3645 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3646 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3647 == loop->simduid))
3648 {
3649 if (!linear_step)
3650 linear_step = 1;
3651 arginfo->linear_step = linear_step;
3652 arginfo->op = base;
3653 arginfo->simd_lane_linear = true;
3654 return;
3655 }
3656 }
3657 }
3658
3659 /* Return the number of elements in vector type VECTYPE, which is associated
3660 with a SIMD clone. At present these vectors always have a constant
3661 length. */
3662
3663 static unsigned HOST_WIDE_INT
3664 simd_clone_subparts (tree vectype)
3665 {
3666 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3667 }
3668
3669 /* Function vectorizable_simd_clone_call.
3670
3671 Check if STMT_INFO performs a function call that can be vectorized
3672 by calling a simd clone of the function.
3673 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3674 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3675 Return true if STMT_INFO is vectorizable in this way. */
3676
3677 static bool
3678 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3679 gimple_stmt_iterator *gsi,
3680 gimple **vec_stmt, slp_tree slp_node,
3681 stmt_vector_for_cost *)
3682 {
3683 tree vec_dest;
3684 tree scalar_dest;
3685 tree op, type;
3686 tree vec_oprnd0 = NULL_TREE;
3687 tree vectype;
3688 unsigned int nunits;
3689 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3690 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3691 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3692 tree fndecl, new_temp;
3693 int ncopies, j;
3694 auto_vec<simd_call_arg_info> arginfo;
3695 vec<tree> vargs = vNULL;
3696 size_t i, nargs;
3697 tree lhs, rtype, ratype;
3698 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3699
3700 /* Is STMT a vectorizable call? */
3701 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3702 if (!stmt)
3703 return false;
3704
3705 fndecl = gimple_call_fndecl (stmt);
3706 if (fndecl == NULL_TREE)
3707 return false;
3708
3709 struct cgraph_node *node = cgraph_node::get (fndecl);
3710 if (node == NULL || node->simd_clones == NULL)
3711 return false;
3712
3713 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3714 return false;
3715
3716 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3717 && ! vec_stmt)
3718 return false;
3719
3720 if (gimple_call_lhs (stmt)
3721 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3722 return false;
3723
3724 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3725
3726 vectype = STMT_VINFO_VECTYPE (stmt_info);
3727
3728 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3729 return false;
3730
3731 /* FORNOW */
3732 if (slp_node)
3733 return false;
3734
3735 /* Process function arguments. */
3736 nargs = gimple_call_num_args (stmt);
3737
3738 /* Bail out if the function has zero arguments. */
3739 if (nargs == 0)
3740 return false;
3741
3742 arginfo.reserve (nargs, true);
3743
3744 for (i = 0; i < nargs; i++)
3745 {
3746 simd_call_arg_info thisarginfo;
3747 affine_iv iv;
3748
3749 thisarginfo.linear_step = 0;
3750 thisarginfo.align = 0;
3751 thisarginfo.op = NULL_TREE;
3752 thisarginfo.simd_lane_linear = false;
3753
3754 op = gimple_call_arg (stmt, i);
3755 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3756 &thisarginfo.vectype)
3757 || thisarginfo.dt == vect_uninitialized_def)
3758 {
3759 if (dump_enabled_p ())
3760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3761 "use not simple.\n");
3762 return false;
3763 }
3764
3765 if (thisarginfo.dt == vect_constant_def
3766 || thisarginfo.dt == vect_external_def)
3767 gcc_assert (thisarginfo.vectype == NULL_TREE);
3768 else
3769 {
3770 gcc_assert (thisarginfo.vectype != NULL_TREE);
3771 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3772 {
3773 if (dump_enabled_p ())
3774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3775 "vector mask arguments are not supported\n");
3776 return false;
3777 }
3778 }
3779
3780 /* For linear arguments, the analyze phase should have saved
3781 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3782 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3783 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3784 {
3785 gcc_assert (vec_stmt);
3786 thisarginfo.linear_step
3787 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3788 thisarginfo.op
3789 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3790 thisarginfo.simd_lane_linear
3791 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3792 == boolean_true_node);
3793 /* If loop has been peeled for alignment, we need to adjust it. */
3794 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3795 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3796 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3797 {
3798 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3799 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3800 tree opt = TREE_TYPE (thisarginfo.op);
3801 bias = fold_convert (TREE_TYPE (step), bias);
3802 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3803 thisarginfo.op
3804 = fold_build2 (POINTER_TYPE_P (opt)
3805 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3806 thisarginfo.op, bias);
3807 }
3808 }
3809 else if (!vec_stmt
3810 && thisarginfo.dt != vect_constant_def
3811 && thisarginfo.dt != vect_external_def
3812 && loop_vinfo
3813 && TREE_CODE (op) == SSA_NAME
3814 && simple_iv (loop, loop_containing_stmt (stmt), op,
3815 &iv, false)
3816 && tree_fits_shwi_p (iv.step))
3817 {
3818 thisarginfo.linear_step = tree_to_shwi (iv.step);
3819 thisarginfo.op = iv.base;
3820 }
3821 else if ((thisarginfo.dt == vect_constant_def
3822 || thisarginfo.dt == vect_external_def)
3823 && POINTER_TYPE_P (TREE_TYPE (op)))
3824 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3825 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3826 linear too. */
3827 if (POINTER_TYPE_P (TREE_TYPE (op))
3828 && !thisarginfo.linear_step
3829 && !vec_stmt
3830 && thisarginfo.dt != vect_constant_def
3831 && thisarginfo.dt != vect_external_def
3832 && loop_vinfo
3833 && !slp_node
3834 && TREE_CODE (op) == SSA_NAME)
3835 vect_simd_lane_linear (op, loop, &thisarginfo);
3836
3837 arginfo.quick_push (thisarginfo);
3838 }
3839
3840 unsigned HOST_WIDE_INT vf;
3841 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3842 {
3843 if (dump_enabled_p ())
3844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3845 "not considering SIMD clones; not yet supported"
3846 " for variable-width vectors.\n");
3847 return false;
3848 }
3849
3850 unsigned int badness = 0;
3851 struct cgraph_node *bestn = NULL;
3852 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3853 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3854 else
3855 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3856 n = n->simdclone->next_clone)
3857 {
3858 unsigned int this_badness = 0;
3859 if (n->simdclone->simdlen > vf
3860 || n->simdclone->nargs != nargs)
3861 continue;
3862 if (n->simdclone->simdlen < vf)
3863 this_badness += (exact_log2 (vf)
3864 - exact_log2 (n->simdclone->simdlen)) * 1024;
3865 if (n->simdclone->inbranch)
3866 this_badness += 2048;
3867 int target_badness = targetm.simd_clone.usable (n);
3868 if (target_badness < 0)
3869 continue;
3870 this_badness += target_badness * 512;
3871 /* FORNOW: Have to add code to add the mask argument. */
3872 if (n->simdclone->inbranch)
3873 continue;
3874 for (i = 0; i < nargs; i++)
3875 {
3876 switch (n->simdclone->args[i].arg_type)
3877 {
3878 case SIMD_CLONE_ARG_TYPE_VECTOR:
3879 if (!useless_type_conversion_p
3880 (n->simdclone->args[i].orig_type,
3881 TREE_TYPE (gimple_call_arg (stmt, i))))
3882 i = -1;
3883 else if (arginfo[i].dt == vect_constant_def
3884 || arginfo[i].dt == vect_external_def
3885 || arginfo[i].linear_step)
3886 this_badness += 64;
3887 break;
3888 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3889 if (arginfo[i].dt != vect_constant_def
3890 && arginfo[i].dt != vect_external_def)
3891 i = -1;
3892 break;
3893 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3894 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3895 if (arginfo[i].dt == vect_constant_def
3896 || arginfo[i].dt == vect_external_def
3897 || (arginfo[i].linear_step
3898 != n->simdclone->args[i].linear_step))
3899 i = -1;
3900 break;
3901 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3902 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3903 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3904 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3905 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3906 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3907 /* FORNOW */
3908 i = -1;
3909 break;
3910 case SIMD_CLONE_ARG_TYPE_MASK:
3911 gcc_unreachable ();
3912 }
3913 if (i == (size_t) -1)
3914 break;
3915 if (n->simdclone->args[i].alignment > arginfo[i].align)
3916 {
3917 i = -1;
3918 break;
3919 }
3920 if (arginfo[i].align)
3921 this_badness += (exact_log2 (arginfo[i].align)
3922 - exact_log2 (n->simdclone->args[i].alignment));
3923 }
3924 if (i == (size_t) -1)
3925 continue;
3926 if (bestn == NULL || this_badness < badness)
3927 {
3928 bestn = n;
3929 badness = this_badness;
3930 }
3931 }
3932
3933 if (bestn == NULL)
3934 return false;
3935
3936 for (i = 0; i < nargs; i++)
3937 if ((arginfo[i].dt == vect_constant_def
3938 || arginfo[i].dt == vect_external_def)
3939 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3940 {
3941 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3942 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3943 slp_node);
3944 if (arginfo[i].vectype == NULL
3945 || (simd_clone_subparts (arginfo[i].vectype)
3946 > bestn->simdclone->simdlen))
3947 return false;
3948 }
3949
3950 fndecl = bestn->decl;
3951 nunits = bestn->simdclone->simdlen;
3952 ncopies = vf / nunits;
3953
3954 /* If the function isn't const, only allow it in simd loops where user
3955 has asserted that at least nunits consecutive iterations can be
3956 performed using SIMD instructions. */
3957 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3958 && gimple_vuse (stmt))
3959 return false;
3960
3961 /* Sanity check: make sure that at least one copy of the vectorized stmt
3962 needs to be generated. */
3963 gcc_assert (ncopies >= 1);
3964
3965 if (!vec_stmt) /* transformation not required. */
3966 {
3967 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3968 for (i = 0; i < nargs; i++)
3969 if ((bestn->simdclone->args[i].arg_type
3970 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3971 || (bestn->simdclone->args[i].arg_type
3972 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3973 {
3974 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3975 + 1);
3976 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3977 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3978 ? size_type_node : TREE_TYPE (arginfo[i].op);
3979 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3980 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3981 tree sll = arginfo[i].simd_lane_linear
3982 ? boolean_true_node : boolean_false_node;
3983 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3984 }
3985 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3986 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
3987 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
3988 dt, slp_node, cost_vec); */
3989 return true;
3990 }
3991
3992 /* Transform. */
3993
3994 if (dump_enabled_p ())
3995 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3996
3997 /* Handle def. */
3998 scalar_dest = gimple_call_lhs (stmt);
3999 vec_dest = NULL_TREE;
4000 rtype = NULL_TREE;
4001 ratype = NULL_TREE;
4002 if (scalar_dest)
4003 {
4004 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4005 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4006 if (TREE_CODE (rtype) == ARRAY_TYPE)
4007 {
4008 ratype = rtype;
4009 rtype = TREE_TYPE (ratype);
4010 }
4011 }
4012
4013 auto_vec<vec<tree> > vec_oprnds;
4014 auto_vec<unsigned> vec_oprnds_i;
4015 vec_oprnds.safe_grow_cleared (nargs);
4016 vec_oprnds_i.safe_grow_cleared (nargs);
4017 for (j = 0; j < ncopies; ++j)
4018 {
4019 /* Build argument list for the vectorized call. */
4020 if (j == 0)
4021 vargs.create (nargs);
4022 else
4023 vargs.truncate (0);
4024
4025 for (i = 0; i < nargs; i++)
4026 {
4027 unsigned int k, l, m, o;
4028 tree atype;
4029 op = gimple_call_arg (stmt, i);
4030 switch (bestn->simdclone->args[i].arg_type)
4031 {
4032 case SIMD_CLONE_ARG_TYPE_VECTOR:
4033 atype = bestn->simdclone->args[i].vector_type;
4034 o = nunits / simd_clone_subparts (atype);
4035 for (m = j * o; m < (j + 1) * o; m++)
4036 {
4037 if (simd_clone_subparts (atype)
4038 < simd_clone_subparts (arginfo[i].vectype))
4039 {
4040 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4041 k = (simd_clone_subparts (arginfo[i].vectype)
4042 / simd_clone_subparts (atype));
4043 gcc_assert ((k & (k - 1)) == 0);
4044 if (m == 0)
4045 {
4046 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4047 ncopies * o / k, op,
4048 &vec_oprnds[i]);
4049 vec_oprnds_i[i] = 0;
4050 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4051 }
4052 else
4053 {
4054 vec_oprnd0 = arginfo[i].op;
4055 if ((m & (k - 1)) == 0)
4056 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4057 }
4058 arginfo[i].op = vec_oprnd0;
4059 vec_oprnd0
4060 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4061 bitsize_int (prec),
4062 bitsize_int ((m & (k - 1)) * prec));
4063 gassign *new_stmt
4064 = gimple_build_assign (make_ssa_name (atype),
4065 vec_oprnd0);
4066 vect_finish_stmt_generation (vinfo, stmt_info,
4067 new_stmt, gsi);
4068 vargs.safe_push (gimple_assign_lhs (new_stmt));
4069 }
4070 else
4071 {
4072 k = (simd_clone_subparts (atype)
4073 / simd_clone_subparts (arginfo[i].vectype));
4074 gcc_assert ((k & (k - 1)) == 0);
4075 vec<constructor_elt, va_gc> *ctor_elts;
4076 if (k != 1)
4077 vec_alloc (ctor_elts, k);
4078 else
4079 ctor_elts = NULL;
4080 for (l = 0; l < k; l++)
4081 {
4082 if (m == 0 && l == 0)
4083 {
4084 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4085 k * o * ncopies,
4086 op,
4087 &vec_oprnds[i]);
4088 vec_oprnds_i[i] = 0;
4089 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4090 }
4091 else
4092 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4093 arginfo[i].op = vec_oprnd0;
4094 if (k == 1)
4095 break;
4096 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4097 vec_oprnd0);
4098 }
4099 if (k == 1)
4100 vargs.safe_push (vec_oprnd0);
4101 else
4102 {
4103 vec_oprnd0 = build_constructor (atype, ctor_elts);
4104 gassign *new_stmt
4105 = gimple_build_assign (make_ssa_name (atype),
4106 vec_oprnd0);
4107 vect_finish_stmt_generation (vinfo, stmt_info,
4108 new_stmt, gsi);
4109 vargs.safe_push (gimple_assign_lhs (new_stmt));
4110 }
4111 }
4112 }
4113 break;
4114 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4115 vargs.safe_push (op);
4116 break;
4117 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4118 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4119 if (j == 0)
4120 {
4121 gimple_seq stmts;
4122 arginfo[i].op
4123 = force_gimple_operand (unshare_expr (arginfo[i].op),
4124 &stmts, true, NULL_TREE);
4125 if (stmts != NULL)
4126 {
4127 basic_block new_bb;
4128 edge pe = loop_preheader_edge (loop);
4129 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4130 gcc_assert (!new_bb);
4131 }
4132 if (arginfo[i].simd_lane_linear)
4133 {
4134 vargs.safe_push (arginfo[i].op);
4135 break;
4136 }
4137 tree phi_res = copy_ssa_name (op);
4138 gphi *new_phi = create_phi_node (phi_res, loop->header);
4139 add_phi_arg (new_phi, arginfo[i].op,
4140 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4141 enum tree_code code
4142 = POINTER_TYPE_P (TREE_TYPE (op))
4143 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4144 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4145 ? sizetype : TREE_TYPE (op);
4146 widest_int cst
4147 = wi::mul (bestn->simdclone->args[i].linear_step,
4148 ncopies * nunits);
4149 tree tcst = wide_int_to_tree (type, cst);
4150 tree phi_arg = copy_ssa_name (op);
4151 gassign *new_stmt
4152 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4153 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4154 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4155 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4156 UNKNOWN_LOCATION);
4157 arginfo[i].op = phi_res;
4158 vargs.safe_push (phi_res);
4159 }
4160 else
4161 {
4162 enum tree_code code
4163 = POINTER_TYPE_P (TREE_TYPE (op))
4164 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4165 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4166 ? sizetype : TREE_TYPE (op);
4167 widest_int cst
4168 = wi::mul (bestn->simdclone->args[i].linear_step,
4169 j * nunits);
4170 tree tcst = wide_int_to_tree (type, cst);
4171 new_temp = make_ssa_name (TREE_TYPE (op));
4172 gassign *new_stmt
4173 = gimple_build_assign (new_temp, code,
4174 arginfo[i].op, tcst);
4175 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4176 vargs.safe_push (new_temp);
4177 }
4178 break;
4179 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4180 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4181 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4182 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4183 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4185 default:
4186 gcc_unreachable ();
4187 }
4188 }
4189
4190 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4191 if (vec_dest)
4192 {
4193 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4194 if (ratype)
4195 new_temp = create_tmp_var (ratype);
4196 else if (simd_clone_subparts (vectype)
4197 == simd_clone_subparts (rtype))
4198 new_temp = make_ssa_name (vec_dest, new_call);
4199 else
4200 new_temp = make_ssa_name (rtype, new_call);
4201 gimple_call_set_lhs (new_call, new_temp);
4202 }
4203 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4204 gimple *new_stmt = new_call;
4205
4206 if (vec_dest)
4207 {
4208 if (simd_clone_subparts (vectype) < nunits)
4209 {
4210 unsigned int k, l;
4211 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4212 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4213 k = nunits / simd_clone_subparts (vectype);
4214 gcc_assert ((k & (k - 1)) == 0);
4215 for (l = 0; l < k; l++)
4216 {
4217 tree t;
4218 if (ratype)
4219 {
4220 t = build_fold_addr_expr (new_temp);
4221 t = build2 (MEM_REF, vectype, t,
4222 build_int_cst (TREE_TYPE (t), l * bytes));
4223 }
4224 else
4225 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4226 bitsize_int (prec), bitsize_int (l * prec));
4227 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4229
4230 if (j == 0 && l == 0)
4231 *vec_stmt = new_stmt;
4232 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4233 }
4234
4235 if (ratype)
4236 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4237 continue;
4238 }
4239 else if (simd_clone_subparts (vectype) > nunits)
4240 {
4241 unsigned int k = (simd_clone_subparts (vectype)
4242 / simd_clone_subparts (rtype));
4243 gcc_assert ((k & (k - 1)) == 0);
4244 if ((j & (k - 1)) == 0)
4245 vec_alloc (ret_ctor_elts, k);
4246 if (ratype)
4247 {
4248 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4249 for (m = 0; m < o; m++)
4250 {
4251 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4252 size_int (m), NULL_TREE, NULL_TREE);
4253 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4254 tem);
4255 vect_finish_stmt_generation (vinfo, stmt_info,
4256 new_stmt, gsi);
4257 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4258 gimple_assign_lhs (new_stmt));
4259 }
4260 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4261 }
4262 else
4263 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4264 if ((j & (k - 1)) != k - 1)
4265 continue;
4266 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4267 new_stmt
4268 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4269 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4270
4271 if ((unsigned) j == k - 1)
4272 *vec_stmt = new_stmt;
4273 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4274 continue;
4275 }
4276 else if (ratype)
4277 {
4278 tree t = build_fold_addr_expr (new_temp);
4279 t = build2 (MEM_REF, vectype, t,
4280 build_int_cst (TREE_TYPE (t), 0));
4281 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4282 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4283 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4284 }
4285 }
4286
4287 if (j == 0)
4288 *vec_stmt = new_stmt;
4289 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4290 }
4291
4292 for (i = 0; i < nargs; ++i)
4293 {
4294 vec<tree> oprndsi = vec_oprnds[i];
4295 oprndsi.release ();
4296 }
4297 vargs.release ();
4298
4299 /* The call in STMT might prevent it from being removed in dce.
4300 We however cannot remove it here, due to the way the ssa name
4301 it defines is mapped to the new definition. So just replace
4302 rhs of the statement with something harmless. */
4303
4304 if (slp_node)
4305 return true;
4306
4307 gimple *new_stmt;
4308 if (scalar_dest)
4309 {
4310 type = TREE_TYPE (scalar_dest);
4311 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4312 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4313 }
4314 else
4315 new_stmt = gimple_build_nop ();
4316 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4317 unlink_stmt_vdef (stmt);
4318
4319 return true;
4320 }
4321
4322
4323 /* Function vect_gen_widened_results_half
4324
4325 Create a vector stmt whose code, type, number of arguments, and result
4326 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4327 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4328 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4329 needs to be created (DECL is a function-decl of a target-builtin).
4330 STMT_INFO is the original scalar stmt that we are vectorizing. */
4331
4332 static gimple *
4333 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4334 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4335 tree vec_dest, gimple_stmt_iterator *gsi,
4336 stmt_vec_info stmt_info)
4337 {
4338 gimple *new_stmt;
4339 tree new_temp;
4340
4341 /* Generate half of the widened result: */
4342 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4343 if (op_type != binary_op)
4344 vec_oprnd1 = NULL;
4345 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4346 new_temp = make_ssa_name (vec_dest, new_stmt);
4347 gimple_assign_set_lhs (new_stmt, new_temp);
4348 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4349
4350 return new_stmt;
4351 }
4352
4353
4354 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4355 For multi-step conversions store the resulting vectors and call the function
4356 recursively. */
4357
4358 static void
4359 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4360 int multi_step_cvt,
4361 stmt_vec_info stmt_info,
4362 vec<tree> vec_dsts,
4363 gimple_stmt_iterator *gsi,
4364 slp_tree slp_node, enum tree_code code)
4365 {
4366 unsigned int i;
4367 tree vop0, vop1, new_tmp, vec_dest;
4368
4369 vec_dest = vec_dsts.pop ();
4370
4371 for (i = 0; i < vec_oprnds->length (); i += 2)
4372 {
4373 /* Create demotion operation. */
4374 vop0 = (*vec_oprnds)[i];
4375 vop1 = (*vec_oprnds)[i + 1];
4376 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4377 new_tmp = make_ssa_name (vec_dest, new_stmt);
4378 gimple_assign_set_lhs (new_stmt, new_tmp);
4379 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4380
4381 if (multi_step_cvt)
4382 /* Store the resulting vector for next recursive call. */
4383 (*vec_oprnds)[i/2] = new_tmp;
4384 else
4385 {
4386 /* This is the last step of the conversion sequence. Store the
4387 vectors in SLP_NODE or in vector info of the scalar statement
4388 (or in STMT_VINFO_RELATED_STMT chain). */
4389 if (slp_node)
4390 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4391 else
4392 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4393 }
4394 }
4395
4396 /* For multi-step demotion operations we first generate demotion operations
4397 from the source type to the intermediate types, and then combine the
4398 results (stored in VEC_OPRNDS) in demotion operation to the destination
4399 type. */
4400 if (multi_step_cvt)
4401 {
4402 /* At each level of recursion we have half of the operands we had at the
4403 previous level. */
4404 vec_oprnds->truncate ((i+1)/2);
4405 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4406 multi_step_cvt - 1,
4407 stmt_info, vec_dsts, gsi,
4408 slp_node, VEC_PACK_TRUNC_EXPR);
4409 }
4410
4411 vec_dsts.quick_push (vec_dest);
4412 }
4413
4414
4415 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4416 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4417 STMT_INFO. For multi-step conversions store the resulting vectors and
4418 call the function recursively. */
4419
4420 static void
4421 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4422 vec<tree> *vec_oprnds0,
4423 vec<tree> *vec_oprnds1,
4424 stmt_vec_info stmt_info, tree vec_dest,
4425 gimple_stmt_iterator *gsi,
4426 enum tree_code code1,
4427 enum tree_code code2, int op_type)
4428 {
4429 int i;
4430 tree vop0, vop1, new_tmp1, new_tmp2;
4431 gimple *new_stmt1, *new_stmt2;
4432 vec<tree> vec_tmp = vNULL;
4433
4434 vec_tmp.create (vec_oprnds0->length () * 2);
4435 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4436 {
4437 if (op_type == binary_op)
4438 vop1 = (*vec_oprnds1)[i];
4439 else
4440 vop1 = NULL_TREE;
4441
4442 /* Generate the two halves of promotion operation. */
4443 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4444 op_type, vec_dest, gsi,
4445 stmt_info);
4446 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4447 op_type, vec_dest, gsi,
4448 stmt_info);
4449 if (is_gimple_call (new_stmt1))
4450 {
4451 new_tmp1 = gimple_call_lhs (new_stmt1);
4452 new_tmp2 = gimple_call_lhs (new_stmt2);
4453 }
4454 else
4455 {
4456 new_tmp1 = gimple_assign_lhs (new_stmt1);
4457 new_tmp2 = gimple_assign_lhs (new_stmt2);
4458 }
4459
4460 /* Store the results for the next step. */
4461 vec_tmp.quick_push (new_tmp1);
4462 vec_tmp.quick_push (new_tmp2);
4463 }
4464
4465 vec_oprnds0->release ();
4466 *vec_oprnds0 = vec_tmp;
4467 }
4468
4469
4470 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4471 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4472 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4473 Return true if STMT_INFO is vectorizable in this way. */
4474
4475 static bool
4476 vectorizable_conversion (vec_info *vinfo,
4477 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4478 gimple **vec_stmt, slp_tree slp_node,
4479 stmt_vector_for_cost *cost_vec)
4480 {
4481 tree vec_dest;
4482 tree scalar_dest;
4483 tree op0, op1 = NULL_TREE;
4484 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4485 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4486 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4487 tree new_temp;
4488 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4489 int ndts = 2;
4490 poly_uint64 nunits_in;
4491 poly_uint64 nunits_out;
4492 tree vectype_out, vectype_in;
4493 int ncopies, i;
4494 tree lhs_type, rhs_type;
4495 enum { NARROW, NONE, WIDEN } modifier;
4496 vec<tree> vec_oprnds0 = vNULL;
4497 vec<tree> vec_oprnds1 = vNULL;
4498 tree vop0;
4499 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4500 int multi_step_cvt = 0;
4501 vec<tree> interm_types = vNULL;
4502 tree intermediate_type, cvt_type = NULL_TREE;
4503 int op_type;
4504 unsigned short fltsz;
4505
4506 /* Is STMT a vectorizable conversion? */
4507
4508 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4509 return false;
4510
4511 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4512 && ! vec_stmt)
4513 return false;
4514
4515 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4516 if (!stmt)
4517 return false;
4518
4519 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4520 return false;
4521
4522 code = gimple_assign_rhs_code (stmt);
4523 if (!CONVERT_EXPR_CODE_P (code)
4524 && code != FIX_TRUNC_EXPR
4525 && code != FLOAT_EXPR
4526 && code != WIDEN_MULT_EXPR
4527 && code != WIDEN_LSHIFT_EXPR)
4528 return false;
4529
4530 op_type = TREE_CODE_LENGTH (code);
4531
4532 /* Check types of lhs and rhs. */
4533 scalar_dest = gimple_assign_lhs (stmt);
4534 lhs_type = TREE_TYPE (scalar_dest);
4535 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4536
4537 /* Check the operands of the operation. */
4538 slp_tree slp_op0, slp_op1 = NULL;
4539 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4540 0, &op0, &slp_op0, &dt[0], &vectype_in))
4541 {
4542 if (dump_enabled_p ())
4543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4544 "use not simple.\n");
4545 return false;
4546 }
4547
4548 rhs_type = TREE_TYPE (op0);
4549 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4550 && !((INTEGRAL_TYPE_P (lhs_type)
4551 && INTEGRAL_TYPE_P (rhs_type))
4552 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4553 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4554 return false;
4555
4556 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4557 && ((INTEGRAL_TYPE_P (lhs_type)
4558 && !type_has_mode_precision_p (lhs_type))
4559 || (INTEGRAL_TYPE_P (rhs_type)
4560 && !type_has_mode_precision_p (rhs_type))))
4561 {
4562 if (dump_enabled_p ())
4563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4564 "type conversion to/from bit-precision unsupported."
4565 "\n");
4566 return false;
4567 }
4568
4569 if (op_type == binary_op)
4570 {
4571 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4572
4573 op1 = gimple_assign_rhs2 (stmt);
4574 tree vectype1_in;
4575 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4576 &op1, &slp_op1, &dt[1], &vectype1_in))
4577 {
4578 if (dump_enabled_p ())
4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4580 "use not simple.\n");
4581 return false;
4582 }
4583 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4584 OP1. */
4585 if (!vectype_in)
4586 vectype_in = vectype1_in;
4587 }
4588
4589 /* If op0 is an external or constant def, infer the vector type
4590 from the scalar type. */
4591 if (!vectype_in)
4592 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4593 if (vec_stmt)
4594 gcc_assert (vectype_in);
4595 if (!vectype_in)
4596 {
4597 if (dump_enabled_p ())
4598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4599 "no vectype for scalar type %T\n", rhs_type);
4600
4601 return false;
4602 }
4603
4604 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4605 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4606 {
4607 if (dump_enabled_p ())
4608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4609 "can't convert between boolean and non "
4610 "boolean vectors %T\n", rhs_type);
4611
4612 return false;
4613 }
4614
4615 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4616 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4617 if (known_eq (nunits_out, nunits_in))
4618 modifier = NONE;
4619 else if (multiple_p (nunits_out, nunits_in))
4620 modifier = NARROW;
4621 else
4622 {
4623 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4624 modifier = WIDEN;
4625 }
4626
4627 /* Multiple types in SLP are handled by creating the appropriate number of
4628 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4629 case of SLP. */
4630 if (slp_node)
4631 ncopies = 1;
4632 else if (modifier == NARROW)
4633 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4634 else
4635 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4636
4637 /* Sanity check: make sure that at least one copy of the vectorized stmt
4638 needs to be generated. */
4639 gcc_assert (ncopies >= 1);
4640
4641 bool found_mode = false;
4642 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4643 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4644 opt_scalar_mode rhs_mode_iter;
4645
4646 /* Supportable by target? */
4647 switch (modifier)
4648 {
4649 case NONE:
4650 if (code != FIX_TRUNC_EXPR
4651 && code != FLOAT_EXPR
4652 && !CONVERT_EXPR_CODE_P (code))
4653 return false;
4654 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4655 break;
4656 /* FALLTHRU */
4657 unsupported:
4658 if (dump_enabled_p ())
4659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4660 "conversion not supported by target.\n");
4661 return false;
4662
4663 case WIDEN:
4664 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4665 vectype_in, &code1, &code2,
4666 &multi_step_cvt, &interm_types))
4667 {
4668 /* Binary widening operation can only be supported directly by the
4669 architecture. */
4670 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4671 break;
4672 }
4673
4674 if (code != FLOAT_EXPR
4675 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4676 goto unsupported;
4677
4678 fltsz = GET_MODE_SIZE (lhs_mode);
4679 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4680 {
4681 rhs_mode = rhs_mode_iter.require ();
4682 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4683 break;
4684
4685 cvt_type
4686 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4687 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4688 if (cvt_type == NULL_TREE)
4689 goto unsupported;
4690
4691 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4692 {
4693 if (!supportable_convert_operation (code, vectype_out,
4694 cvt_type, &codecvt1))
4695 goto unsupported;
4696 }
4697 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4698 vectype_out, cvt_type,
4699 &codecvt1, &codecvt2,
4700 &multi_step_cvt,
4701 &interm_types))
4702 continue;
4703 else
4704 gcc_assert (multi_step_cvt == 0);
4705
4706 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4707 cvt_type,
4708 vectype_in, &code1, &code2,
4709 &multi_step_cvt, &interm_types))
4710 {
4711 found_mode = true;
4712 break;
4713 }
4714 }
4715
4716 if (!found_mode)
4717 goto unsupported;
4718
4719 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4720 codecvt2 = ERROR_MARK;
4721 else
4722 {
4723 multi_step_cvt++;
4724 interm_types.safe_push (cvt_type);
4725 cvt_type = NULL_TREE;
4726 }
4727 break;
4728
4729 case NARROW:
4730 gcc_assert (op_type == unary_op);
4731 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4732 &code1, &multi_step_cvt,
4733 &interm_types))
4734 break;
4735
4736 if (code != FIX_TRUNC_EXPR
4737 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4738 goto unsupported;
4739
4740 cvt_type
4741 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4742 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4743 if (cvt_type == NULL_TREE)
4744 goto unsupported;
4745 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4746 &codecvt1))
4747 goto unsupported;
4748 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4749 &code1, &multi_step_cvt,
4750 &interm_types))
4751 break;
4752 goto unsupported;
4753
4754 default:
4755 gcc_unreachable ();
4756 }
4757
4758 if (!vec_stmt) /* transformation not required. */
4759 {
4760 if (slp_node
4761 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4762 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4763 {
4764 if (dump_enabled_p ())
4765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4766 "incompatible vector types for invariants\n");
4767 return false;
4768 }
4769 DUMP_VECT_SCOPE ("vectorizable_conversion");
4770 if (modifier == NONE)
4771 {
4772 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4773 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4774 cost_vec);
4775 }
4776 else if (modifier == NARROW)
4777 {
4778 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4779 /* The final packing step produces one vector result per copy. */
4780 unsigned int nvectors
4781 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4782 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4783 multi_step_cvt, cost_vec);
4784 }
4785 else
4786 {
4787 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4788 /* The initial unpacking step produces two vector results
4789 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4790 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4791 unsigned int nvectors
4792 = (slp_node
4793 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4794 : ncopies * 2);
4795 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4796 multi_step_cvt, cost_vec);
4797 }
4798 interm_types.release ();
4799 return true;
4800 }
4801
4802 /* Transform. */
4803 if (dump_enabled_p ())
4804 dump_printf_loc (MSG_NOTE, vect_location,
4805 "transform conversion. ncopies = %d.\n", ncopies);
4806
4807 if (op_type == binary_op)
4808 {
4809 if (CONSTANT_CLASS_P (op0))
4810 op0 = fold_convert (TREE_TYPE (op1), op0);
4811 else if (CONSTANT_CLASS_P (op1))
4812 op1 = fold_convert (TREE_TYPE (op0), op1);
4813 }
4814
4815 /* In case of multi-step conversion, we first generate conversion operations
4816 to the intermediate types, and then from that types to the final one.
4817 We create vector destinations for the intermediate type (TYPES) received
4818 from supportable_*_operation, and store them in the correct order
4819 for future use in vect_create_vectorized_*_stmts (). */
4820 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4821 vec_dest = vect_create_destination_var (scalar_dest,
4822 (cvt_type && modifier == WIDEN)
4823 ? cvt_type : vectype_out);
4824 vec_dsts.quick_push (vec_dest);
4825
4826 if (multi_step_cvt)
4827 {
4828 for (i = interm_types.length () - 1;
4829 interm_types.iterate (i, &intermediate_type); i--)
4830 {
4831 vec_dest = vect_create_destination_var (scalar_dest,
4832 intermediate_type);
4833 vec_dsts.quick_push (vec_dest);
4834 }
4835 }
4836
4837 if (cvt_type)
4838 vec_dest = vect_create_destination_var (scalar_dest,
4839 modifier == WIDEN
4840 ? vectype_out : cvt_type);
4841
4842 int ninputs = 1;
4843 if (!slp_node)
4844 {
4845 if (modifier == WIDEN)
4846 ;
4847 else if (modifier == NARROW)
4848 {
4849 if (multi_step_cvt)
4850 ninputs = vect_pow2 (multi_step_cvt);
4851 ninputs *= 2;
4852 }
4853 }
4854
4855 switch (modifier)
4856 {
4857 case NONE:
4858 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
4859 op0, &vec_oprnds0);
4860 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4861 {
4862 /* Arguments are ready, create the new vector stmt. */
4863 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4864 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4865 new_temp = make_ssa_name (vec_dest, new_stmt);
4866 gimple_assign_set_lhs (new_stmt, new_temp);
4867 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4868
4869 if (slp_node)
4870 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4871 else
4872 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4873 }
4874 break;
4875
4876 case WIDEN:
4877 /* In case the vectorization factor (VF) is bigger than the number
4878 of elements that we can fit in a vectype (nunits), we have to
4879 generate more than one vector stmt - i.e - we need to "unroll"
4880 the vector stmt by a factor VF/nunits. */
4881 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4882 op0, &vec_oprnds0,
4883 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
4884 &vec_oprnds1);
4885 if (code == WIDEN_LSHIFT_EXPR)
4886 {
4887 vec_oprnds1.create (ncopies * ninputs);
4888 for (i = 0; i < ncopies * ninputs; ++i)
4889 vec_oprnds1.quick_push (op1);
4890 }
4891 /* Arguments are ready. Create the new vector stmts. */
4892 for (i = multi_step_cvt; i >= 0; i--)
4893 {
4894 tree this_dest = vec_dsts[i];
4895 enum tree_code c1 = code1, c2 = code2;
4896 if (i == 0 && codecvt2 != ERROR_MARK)
4897 {
4898 c1 = codecvt1;
4899 c2 = codecvt2;
4900 }
4901 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
4902 &vec_oprnds1, stmt_info,
4903 this_dest, gsi,
4904 c1, c2, op_type);
4905 }
4906
4907 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4908 {
4909 gimple *new_stmt;
4910 if (cvt_type)
4911 {
4912 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4913 new_temp = make_ssa_name (vec_dest);
4914 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
4915 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4916 }
4917 else
4918 new_stmt = SSA_NAME_DEF_STMT (vop0);
4919
4920 if (slp_node)
4921 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4922 else
4923 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4924 }
4925 break;
4926
4927 case NARROW:
4928 /* In case the vectorization factor (VF) is bigger than the number
4929 of elements that we can fit in a vectype (nunits), we have to
4930 generate more than one vector stmt - i.e - we need to "unroll"
4931 the vector stmt by a factor VF/nunits. */
4932 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4933 op0, &vec_oprnds0);
4934 /* Arguments are ready. Create the new vector stmts. */
4935 if (cvt_type)
4936 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4937 {
4938 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4939 new_temp = make_ssa_name (vec_dest);
4940 gassign *new_stmt
4941 = gimple_build_assign (new_temp, codecvt1, vop0);
4942 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4943 vec_oprnds0[i] = new_temp;
4944 }
4945
4946 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
4947 multi_step_cvt,
4948 stmt_info, vec_dsts, gsi,
4949 slp_node, code1);
4950 break;
4951 }
4952 if (!slp_node)
4953 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
4954
4955 vec_oprnds0.release ();
4956 vec_oprnds1.release ();
4957 interm_types.release ();
4958
4959 return true;
4960 }
4961
4962 /* Return true if we can assume from the scalar form of STMT_INFO that
4963 neither the scalar nor the vector forms will generate code. STMT_INFO
4964 is known not to involve a data reference. */
4965
4966 bool
4967 vect_nop_conversion_p (stmt_vec_info stmt_info)
4968 {
4969 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4970 if (!stmt)
4971 return false;
4972
4973 tree lhs = gimple_assign_lhs (stmt);
4974 tree_code code = gimple_assign_rhs_code (stmt);
4975 tree rhs = gimple_assign_rhs1 (stmt);
4976
4977 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
4978 return true;
4979
4980 if (CONVERT_EXPR_CODE_P (code))
4981 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
4982
4983 return false;
4984 }
4985
4986 /* Function vectorizable_assignment.
4987
4988 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
4989 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
4990 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4991 Return true if STMT_INFO is vectorizable in this way. */
4992
4993 static bool
4994 vectorizable_assignment (vec_info *vinfo,
4995 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4996 gimple **vec_stmt, slp_tree slp_node,
4997 stmt_vector_for_cost *cost_vec)
4998 {
4999 tree vec_dest;
5000 tree scalar_dest;
5001 tree op;
5002 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5003 tree new_temp;
5004 enum vect_def_type dt[1] = {vect_unknown_def_type};
5005 int ndts = 1;
5006 int ncopies;
5007 int i;
5008 vec<tree> vec_oprnds = vNULL;
5009 tree vop;
5010 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5011 enum tree_code code;
5012 tree vectype_in;
5013
5014 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5015 return false;
5016
5017 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5018 && ! vec_stmt)
5019 return false;
5020
5021 /* Is vectorizable assignment? */
5022 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5023 if (!stmt)
5024 return false;
5025
5026 scalar_dest = gimple_assign_lhs (stmt);
5027 if (TREE_CODE (scalar_dest) != SSA_NAME)
5028 return false;
5029
5030 if (STMT_VINFO_DATA_REF (stmt_info))
5031 return false;
5032
5033 code = gimple_assign_rhs_code (stmt);
5034 if (!(gimple_assign_single_p (stmt)
5035 || code == PAREN_EXPR
5036 || CONVERT_EXPR_CODE_P (code)))
5037 return false;
5038
5039 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5040 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5041
5042 /* Multiple types in SLP are handled by creating the appropriate number of
5043 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5044 case of SLP. */
5045 if (slp_node)
5046 ncopies = 1;
5047 else
5048 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5049
5050 gcc_assert (ncopies >= 1);
5051
5052 slp_tree slp_op;
5053 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5054 &dt[0], &vectype_in))
5055 {
5056 if (dump_enabled_p ())
5057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5058 "use not simple.\n");
5059 return false;
5060 }
5061 if (!vectype_in)
5062 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5063
5064 /* We can handle NOP_EXPR conversions that do not change the number
5065 of elements or the vector size. */
5066 if ((CONVERT_EXPR_CODE_P (code)
5067 || code == VIEW_CONVERT_EXPR)
5068 && (!vectype_in
5069 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5070 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5071 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5072 return false;
5073
5074 /* We do not handle bit-precision changes. */
5075 if ((CONVERT_EXPR_CODE_P (code)
5076 || code == VIEW_CONVERT_EXPR)
5077 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5078 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5079 || !type_has_mode_precision_p (TREE_TYPE (op)))
5080 /* But a conversion that does not change the bit-pattern is ok. */
5081 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5082 > TYPE_PRECISION (TREE_TYPE (op)))
5083 && TYPE_UNSIGNED (TREE_TYPE (op)))
5084 /* Conversion between boolean types of different sizes is
5085 a simple assignment in case their vectypes are same
5086 boolean vectors. */
5087 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5088 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5089 {
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5092 "type conversion to/from bit-precision "
5093 "unsupported.\n");
5094 return false;
5095 }
5096
5097 if (!vec_stmt) /* transformation not required. */
5098 {
5099 if (slp_node
5100 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5101 {
5102 if (dump_enabled_p ())
5103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5104 "incompatible vector types for invariants\n");
5105 return false;
5106 }
5107 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5108 DUMP_VECT_SCOPE ("vectorizable_assignment");
5109 if (!vect_nop_conversion_p (stmt_info))
5110 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5111 cost_vec);
5112 return true;
5113 }
5114
5115 /* Transform. */
5116 if (dump_enabled_p ())
5117 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5118
5119 /* Handle def. */
5120 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5121
5122 /* Handle use. */
5123 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5124
5125 /* Arguments are ready. create the new vector stmt. */
5126 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5127 {
5128 if (CONVERT_EXPR_CODE_P (code)
5129 || code == VIEW_CONVERT_EXPR)
5130 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5131 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5132 new_temp = make_ssa_name (vec_dest, new_stmt);
5133 gimple_assign_set_lhs (new_stmt, new_temp);
5134 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5135 if (slp_node)
5136 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5137 else
5138 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5139 }
5140 if (!slp_node)
5141 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5142
5143 vec_oprnds.release ();
5144 return true;
5145 }
5146
5147
5148 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5149 either as shift by a scalar or by a vector. */
5150
5151 bool
5152 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5153 {
5154
5155 machine_mode vec_mode;
5156 optab optab;
5157 int icode;
5158 tree vectype;
5159
5160 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5161 if (!vectype)
5162 return false;
5163
5164 optab = optab_for_tree_code (code, vectype, optab_scalar);
5165 if (!optab
5166 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5167 {
5168 optab = optab_for_tree_code (code, vectype, optab_vector);
5169 if (!optab
5170 || (optab_handler (optab, TYPE_MODE (vectype))
5171 == CODE_FOR_nothing))
5172 return false;
5173 }
5174
5175 vec_mode = TYPE_MODE (vectype);
5176 icode = (int) optab_handler (optab, vec_mode);
5177 if (icode == CODE_FOR_nothing)
5178 return false;
5179
5180 return true;
5181 }
5182
5183
5184 /* Function vectorizable_shift.
5185
5186 Check if STMT_INFO performs a shift operation that can be vectorized.
5187 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5188 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5189 Return true if STMT_INFO is vectorizable in this way. */
5190
5191 static bool
5192 vectorizable_shift (vec_info *vinfo,
5193 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5194 gimple **vec_stmt, slp_tree slp_node,
5195 stmt_vector_for_cost *cost_vec)
5196 {
5197 tree vec_dest;
5198 tree scalar_dest;
5199 tree op0, op1 = NULL;
5200 tree vec_oprnd1 = NULL_TREE;
5201 tree vectype;
5202 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5203 enum tree_code code;
5204 machine_mode vec_mode;
5205 tree new_temp;
5206 optab optab;
5207 int icode;
5208 machine_mode optab_op2_mode;
5209 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5210 int ndts = 2;
5211 poly_uint64 nunits_in;
5212 poly_uint64 nunits_out;
5213 tree vectype_out;
5214 tree op1_vectype;
5215 int ncopies;
5216 int i;
5217 vec<tree> vec_oprnds0 = vNULL;
5218 vec<tree> vec_oprnds1 = vNULL;
5219 tree vop0, vop1;
5220 unsigned int k;
5221 bool scalar_shift_arg = true;
5222 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5223 bool incompatible_op1_vectype_p = false;
5224
5225 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5226 return false;
5227
5228 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5229 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5230 && ! vec_stmt)
5231 return false;
5232
5233 /* Is STMT a vectorizable binary/unary operation? */
5234 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5235 if (!stmt)
5236 return false;
5237
5238 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5239 return false;
5240
5241 code = gimple_assign_rhs_code (stmt);
5242
5243 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5244 || code == RROTATE_EXPR))
5245 return false;
5246
5247 scalar_dest = gimple_assign_lhs (stmt);
5248 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5249 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5250 {
5251 if (dump_enabled_p ())
5252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5253 "bit-precision shifts not supported.\n");
5254 return false;
5255 }
5256
5257 slp_tree slp_op0;
5258 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5259 0, &op0, &slp_op0, &dt[0], &vectype))
5260 {
5261 if (dump_enabled_p ())
5262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5263 "use not simple.\n");
5264 return false;
5265 }
5266 /* If op0 is an external or constant def, infer the vector type
5267 from the scalar type. */
5268 if (!vectype)
5269 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5270 if (vec_stmt)
5271 gcc_assert (vectype);
5272 if (!vectype)
5273 {
5274 if (dump_enabled_p ())
5275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5276 "no vectype for scalar type\n");
5277 return false;
5278 }
5279
5280 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5281 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5282 if (maybe_ne (nunits_out, nunits_in))
5283 return false;
5284
5285 stmt_vec_info op1_def_stmt_info;
5286 slp_tree slp_op1;
5287 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5288 &dt[1], &op1_vectype, &op1_def_stmt_info))
5289 {
5290 if (dump_enabled_p ())
5291 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5292 "use not simple.\n");
5293 return false;
5294 }
5295
5296 /* Multiple types in SLP are handled by creating the appropriate number of
5297 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5298 case of SLP. */
5299 if (slp_node)
5300 ncopies = 1;
5301 else
5302 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5303
5304 gcc_assert (ncopies >= 1);
5305
5306 /* Determine whether the shift amount is a vector, or scalar. If the
5307 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5308
5309 if ((dt[1] == vect_internal_def
5310 || dt[1] == vect_induction_def
5311 || dt[1] == vect_nested_cycle)
5312 && !slp_node)
5313 scalar_shift_arg = false;
5314 else if (dt[1] == vect_constant_def
5315 || dt[1] == vect_external_def
5316 || dt[1] == vect_internal_def)
5317 {
5318 /* In SLP, need to check whether the shift count is the same,
5319 in loops if it is a constant or invariant, it is always
5320 a scalar shift. */
5321 if (slp_node)
5322 {
5323 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5324 stmt_vec_info slpstmt_info;
5325
5326 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5327 {
5328 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5329 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5330 scalar_shift_arg = false;
5331 }
5332
5333 /* For internal SLP defs we have to make sure we see scalar stmts
5334 for all vector elements.
5335 ??? For different vectors we could resort to a different
5336 scalar shift operand but code-generation below simply always
5337 takes the first. */
5338 if (dt[1] == vect_internal_def
5339 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5340 stmts.length ()))
5341 scalar_shift_arg = false;
5342 }
5343
5344 /* If the shift amount is computed by a pattern stmt we cannot
5345 use the scalar amount directly thus give up and use a vector
5346 shift. */
5347 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5348 scalar_shift_arg = false;
5349 }
5350 else
5351 {
5352 if (dump_enabled_p ())
5353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5354 "operand mode requires invariant argument.\n");
5355 return false;
5356 }
5357
5358 /* Vector shifted by vector. */
5359 bool was_scalar_shift_arg = scalar_shift_arg;
5360 if (!scalar_shift_arg)
5361 {
5362 optab = optab_for_tree_code (code, vectype, optab_vector);
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_NOTE, vect_location,
5365 "vector/vector shift/rotate found.\n");
5366
5367 if (!op1_vectype)
5368 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5369 slp_op1);
5370 incompatible_op1_vectype_p
5371 = (op1_vectype == NULL_TREE
5372 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5373 TYPE_VECTOR_SUBPARTS (vectype))
5374 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5375 if (incompatible_op1_vectype_p
5376 && (!slp_node
5377 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5378 || slp_op1->refcnt != 1))
5379 {
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "unusable type for last operand in"
5383 " vector/vector shift/rotate.\n");
5384 return false;
5385 }
5386 }
5387 /* See if the machine has a vector shifted by scalar insn and if not
5388 then see if it has a vector shifted by vector insn. */
5389 else
5390 {
5391 optab = optab_for_tree_code (code, vectype, optab_scalar);
5392 if (optab
5393 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5394 {
5395 if (dump_enabled_p ())
5396 dump_printf_loc (MSG_NOTE, vect_location,
5397 "vector/scalar shift/rotate found.\n");
5398 }
5399 else
5400 {
5401 optab = optab_for_tree_code (code, vectype, optab_vector);
5402 if (optab
5403 && (optab_handler (optab, TYPE_MODE (vectype))
5404 != CODE_FOR_nothing))
5405 {
5406 scalar_shift_arg = false;
5407
5408 if (dump_enabled_p ())
5409 dump_printf_loc (MSG_NOTE, vect_location,
5410 "vector/vector shift/rotate found.\n");
5411
5412 if (!op1_vectype)
5413 op1_vectype = get_vectype_for_scalar_type (vinfo,
5414 TREE_TYPE (op1),
5415 slp_node);
5416
5417 /* Unlike the other binary operators, shifts/rotates have
5418 the rhs being int, instead of the same type as the lhs,
5419 so make sure the scalar is the right type if we are
5420 dealing with vectors of long long/long/short/char. */
5421 incompatible_op1_vectype_p
5422 = (!op1_vectype
5423 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5424 TREE_TYPE (op1)));
5425 }
5426 }
5427 }
5428
5429 /* Supportable by target? */
5430 if (!optab)
5431 {
5432 if (dump_enabled_p ())
5433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5434 "no optab.\n");
5435 return false;
5436 }
5437 vec_mode = TYPE_MODE (vectype);
5438 icode = (int) optab_handler (optab, vec_mode);
5439 if (icode == CODE_FOR_nothing)
5440 {
5441 if (dump_enabled_p ())
5442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5443 "op not supported by target.\n");
5444 /* Check only during analysis. */
5445 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5446 || (!vec_stmt
5447 && !vect_worthwhile_without_simd_p (vinfo, code)))
5448 return false;
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_NOTE, vect_location,
5451 "proceeding using word mode.\n");
5452 }
5453
5454 /* Worthwhile without SIMD support? Check only during analysis. */
5455 if (!vec_stmt
5456 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5457 && !vect_worthwhile_without_simd_p (vinfo, code))
5458 {
5459 if (dump_enabled_p ())
5460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5461 "not worthwhile without SIMD support.\n");
5462 return false;
5463 }
5464
5465 if (!vec_stmt) /* transformation not required. */
5466 {
5467 if (slp_node
5468 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5469 || (!scalar_shift_arg
5470 && (!incompatible_op1_vectype_p
5471 || dt[1] == vect_constant_def)
5472 && !vect_maybe_update_slp_op_vectype
5473 (slp_op1,
5474 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5475 {
5476 if (dump_enabled_p ())
5477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5478 "incompatible vector types for invariants\n");
5479 return false;
5480 }
5481 /* Now adjust the constant shift amount in place. */
5482 if (slp_node
5483 && incompatible_op1_vectype_p
5484 && dt[1] == vect_constant_def)
5485 {
5486 for (unsigned i = 0;
5487 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5488 {
5489 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5490 = fold_convert (TREE_TYPE (vectype),
5491 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5492 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5493 == INTEGER_CST));
5494 }
5495 }
5496 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5497 DUMP_VECT_SCOPE ("vectorizable_shift");
5498 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5499 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5500 return true;
5501 }
5502
5503 /* Transform. */
5504
5505 if (dump_enabled_p ())
5506 dump_printf_loc (MSG_NOTE, vect_location,
5507 "transform binary/unary operation.\n");
5508
5509 if (incompatible_op1_vectype_p && !slp_node)
5510 {
5511 op1 = fold_convert (TREE_TYPE (vectype), op1);
5512 if (dt[1] != vect_constant_def)
5513 op1 = vect_init_vector (vinfo, stmt_info, op1,
5514 TREE_TYPE (vectype), NULL);
5515 }
5516
5517 /* Handle def. */
5518 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5519
5520 if (scalar_shift_arg)
5521 {
5522 /* Vector shl and shr insn patterns can be defined with scalar
5523 operand 2 (shift operand). In this case, use constant or loop
5524 invariant op1 directly, without extending it to vector mode
5525 first. */
5526 optab_op2_mode = insn_data[icode].operand[2].mode;
5527 if (!VECTOR_MODE_P (optab_op2_mode))
5528 {
5529 if (dump_enabled_p ())
5530 dump_printf_loc (MSG_NOTE, vect_location,
5531 "operand 1 using scalar mode.\n");
5532 vec_oprnd1 = op1;
5533 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5534 vec_oprnds1.quick_push (vec_oprnd1);
5535 /* Store vec_oprnd1 for every vector stmt to be created.
5536 We check during the analysis that all the shift arguments
5537 are the same.
5538 TODO: Allow different constants for different vector
5539 stmts generated for an SLP instance. */
5540 for (k = 0;
5541 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5542 vec_oprnds1.quick_push (vec_oprnd1);
5543 }
5544 }
5545 else if (slp_node && incompatible_op1_vectype_p)
5546 {
5547 if (was_scalar_shift_arg)
5548 {
5549 /* If the argument was the same in all lanes create
5550 the correctly typed vector shift amount directly. */
5551 op1 = fold_convert (TREE_TYPE (vectype), op1);
5552 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5553 !loop_vinfo ? gsi : NULL);
5554 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5555 !loop_vinfo ? gsi : NULL);
5556 vec_oprnds1.create (slp_node->vec_stmts_size);
5557 for (k = 0; k < slp_node->vec_stmts_size; k++)
5558 vec_oprnds1.quick_push (vec_oprnd1);
5559 }
5560 else if (dt[1] == vect_constant_def)
5561 /* The constant shift amount has been adjusted in place. */
5562 ;
5563 else
5564 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5565 }
5566
5567 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5568 (a special case for certain kind of vector shifts); otherwise,
5569 operand 1 should be of a vector type (the usual case). */
5570 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5571 op0, &vec_oprnds0,
5572 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5573
5574 /* Arguments are ready. Create the new vector stmt. */
5575 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5576 {
5577 vop1 = vec_oprnds1[i];
5578 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5579 new_temp = make_ssa_name (vec_dest, new_stmt);
5580 gimple_assign_set_lhs (new_stmt, new_temp);
5581 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5582 if (slp_node)
5583 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5584 else
5585 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5586 }
5587
5588 if (!slp_node)
5589 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5590
5591 vec_oprnds0.release ();
5592 vec_oprnds1.release ();
5593
5594 return true;
5595 }
5596
5597
5598 /* Function vectorizable_operation.
5599
5600 Check if STMT_INFO performs a binary, unary or ternary operation that can
5601 be vectorized.
5602 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5603 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5604 Return true if STMT_INFO is vectorizable in this way. */
5605
5606 static bool
5607 vectorizable_operation (vec_info *vinfo,
5608 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5609 gimple **vec_stmt, slp_tree slp_node,
5610 stmt_vector_for_cost *cost_vec)
5611 {
5612 tree vec_dest;
5613 tree scalar_dest;
5614 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5615 tree vectype;
5616 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5617 enum tree_code code, orig_code;
5618 machine_mode vec_mode;
5619 tree new_temp;
5620 int op_type;
5621 optab optab;
5622 bool target_support_p;
5623 enum vect_def_type dt[3]
5624 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5625 int ndts = 3;
5626 poly_uint64 nunits_in;
5627 poly_uint64 nunits_out;
5628 tree vectype_out;
5629 int ncopies, vec_num;
5630 int i;
5631 vec<tree> vec_oprnds0 = vNULL;
5632 vec<tree> vec_oprnds1 = vNULL;
5633 vec<tree> vec_oprnds2 = vNULL;
5634 tree vop0, vop1, vop2;
5635 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5636
5637 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5638 return false;
5639
5640 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5641 && ! vec_stmt)
5642 return false;
5643
5644 /* Is STMT a vectorizable binary/unary operation? */
5645 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5646 if (!stmt)
5647 return false;
5648
5649 /* Loads and stores are handled in vectorizable_{load,store}. */
5650 if (STMT_VINFO_DATA_REF (stmt_info))
5651 return false;
5652
5653 orig_code = code = gimple_assign_rhs_code (stmt);
5654
5655 /* Shifts are handled in vectorizable_shift. */
5656 if (code == LSHIFT_EXPR
5657 || code == RSHIFT_EXPR
5658 || code == LROTATE_EXPR
5659 || code == RROTATE_EXPR)
5660 return false;
5661
5662 /* Comparisons are handled in vectorizable_comparison. */
5663 if (TREE_CODE_CLASS (code) == tcc_comparison)
5664 return false;
5665
5666 /* Conditions are handled in vectorizable_condition. */
5667 if (code == COND_EXPR)
5668 return false;
5669
5670 /* For pointer addition and subtraction, we should use the normal
5671 plus and minus for the vector operation. */
5672 if (code == POINTER_PLUS_EXPR)
5673 code = PLUS_EXPR;
5674 if (code == POINTER_DIFF_EXPR)
5675 code = MINUS_EXPR;
5676
5677 /* Support only unary or binary operations. */
5678 op_type = TREE_CODE_LENGTH (code);
5679 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5680 {
5681 if (dump_enabled_p ())
5682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5683 "num. args = %d (not unary/binary/ternary op).\n",
5684 op_type);
5685 return false;
5686 }
5687
5688 scalar_dest = gimple_assign_lhs (stmt);
5689 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5690
5691 /* Most operations cannot handle bit-precision types without extra
5692 truncations. */
5693 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5694 if (!mask_op_p
5695 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5696 /* Exception are bitwise binary operations. */
5697 && code != BIT_IOR_EXPR
5698 && code != BIT_XOR_EXPR
5699 && code != BIT_AND_EXPR)
5700 {
5701 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5703 "bit-precision arithmetic not supported.\n");
5704 return false;
5705 }
5706
5707 slp_tree slp_op0;
5708 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5709 0, &op0, &slp_op0, &dt[0], &vectype))
5710 {
5711 if (dump_enabled_p ())
5712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5713 "use not simple.\n");
5714 return false;
5715 }
5716 /* If op0 is an external or constant def, infer the vector type
5717 from the scalar type. */
5718 if (!vectype)
5719 {
5720 /* For boolean type we cannot determine vectype by
5721 invariant value (don't know whether it is a vector
5722 of booleans or vector of integers). We use output
5723 vectype because operations on boolean don't change
5724 type. */
5725 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5726 {
5727 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5728 {
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5731 "not supported operation on bool value.\n");
5732 return false;
5733 }
5734 vectype = vectype_out;
5735 }
5736 else
5737 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5738 slp_node);
5739 }
5740 if (vec_stmt)
5741 gcc_assert (vectype);
5742 if (!vectype)
5743 {
5744 if (dump_enabled_p ())
5745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5746 "no vectype for scalar type %T\n",
5747 TREE_TYPE (op0));
5748
5749 return false;
5750 }
5751
5752 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5753 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5754 if (maybe_ne (nunits_out, nunits_in))
5755 return false;
5756
5757 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5758 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5759 if (op_type == binary_op || op_type == ternary_op)
5760 {
5761 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5762 1, &op1, &slp_op1, &dt[1], &vectype2))
5763 {
5764 if (dump_enabled_p ())
5765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5766 "use not simple.\n");
5767 return false;
5768 }
5769 }
5770 if (op_type == ternary_op)
5771 {
5772 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5773 2, &op2, &slp_op2, &dt[2], &vectype3))
5774 {
5775 if (dump_enabled_p ())
5776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5777 "use not simple.\n");
5778 return false;
5779 }
5780 }
5781
5782 /* Multiple types in SLP are handled by creating the appropriate number of
5783 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5784 case of SLP. */
5785 if (slp_node)
5786 {
5787 ncopies = 1;
5788 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5789 }
5790 else
5791 {
5792 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5793 vec_num = 1;
5794 }
5795
5796 gcc_assert (ncopies >= 1);
5797
5798 /* Reject attempts to combine mask types with nonmask types, e.g. if
5799 we have an AND between a (nonmask) boolean loaded from memory and
5800 a (mask) boolean result of a comparison.
5801
5802 TODO: We could easily fix these cases up using pattern statements. */
5803 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5804 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5805 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5806 {
5807 if (dump_enabled_p ())
5808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5809 "mixed mask and nonmask vector types\n");
5810 return false;
5811 }
5812
5813 /* Supportable by target? */
5814
5815 vec_mode = TYPE_MODE (vectype);
5816 if (code == MULT_HIGHPART_EXPR)
5817 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5818 else
5819 {
5820 optab = optab_for_tree_code (code, vectype, optab_default);
5821 if (!optab)
5822 {
5823 if (dump_enabled_p ())
5824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5825 "no optab.\n");
5826 return false;
5827 }
5828 target_support_p = (optab_handler (optab, vec_mode)
5829 != CODE_FOR_nothing);
5830 }
5831
5832 if (!target_support_p)
5833 {
5834 if (dump_enabled_p ())
5835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5836 "op not supported by target.\n");
5837 /* Check only during analysis. */
5838 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5839 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5840 return false;
5841 if (dump_enabled_p ())
5842 dump_printf_loc (MSG_NOTE, vect_location,
5843 "proceeding using word mode.\n");
5844 }
5845
5846 /* Worthwhile without SIMD support? Check only during analysis. */
5847 if (!VECTOR_MODE_P (vec_mode)
5848 && !vec_stmt
5849 && !vect_worthwhile_without_simd_p (vinfo, code))
5850 {
5851 if (dump_enabled_p ())
5852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5853 "not worthwhile without SIMD support.\n");
5854 return false;
5855 }
5856
5857 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
5858 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
5859 internal_fn cond_fn = get_conditional_internal_fn (code);
5860
5861 if (!vec_stmt) /* transformation not required. */
5862 {
5863 /* If this operation is part of a reduction, a fully-masked loop
5864 should only change the active lanes of the reduction chain,
5865 keeping the inactive lanes as-is. */
5866 if (loop_vinfo
5867 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
5868 && reduc_idx >= 0)
5869 {
5870 if (cond_fn == IFN_LAST
5871 || !direct_internal_fn_supported_p (cond_fn, vectype,
5872 OPTIMIZE_FOR_SPEED))
5873 {
5874 if (dump_enabled_p ())
5875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5876 "can't use a fully-masked loop because no"
5877 " conditional operation is available.\n");
5878 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
5879 }
5880 else
5881 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
5882 vectype, NULL);
5883 }
5884
5885 /* Put types on constant and invariant SLP children. */
5886 if (slp_node
5887 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5888 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
5889 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
5890 {
5891 if (dump_enabled_p ())
5892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5893 "incompatible vector types for invariants\n");
5894 return false;
5895 }
5896
5897 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5898 DUMP_VECT_SCOPE ("vectorizable_operation");
5899 vect_model_simple_cost (vinfo, stmt_info,
5900 ncopies, dt, ndts, slp_node, cost_vec);
5901 return true;
5902 }
5903
5904 /* Transform. */
5905
5906 if (dump_enabled_p ())
5907 dump_printf_loc (MSG_NOTE, vect_location,
5908 "transform binary/unary operation.\n");
5909
5910 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
5911
5912 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5913 vectors with unsigned elements, but the result is signed. So, we
5914 need to compute the MINUS_EXPR into vectype temporary and
5915 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5916 tree vec_cvt_dest = NULL_TREE;
5917 if (orig_code == POINTER_DIFF_EXPR)
5918 {
5919 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5920 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5921 }
5922 /* Handle def. */
5923 else
5924 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5925
5926 /* In case the vectorization factor (VF) is bigger than the number
5927 of elements that we can fit in a vectype (nunits), we have to generate
5928 more than one vector stmt - i.e - we need to "unroll" the
5929 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5930 from one copy of the vector stmt to the next, in the field
5931 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5932 stages to find the correct vector defs to be used when vectorizing
5933 stmts that use the defs of the current stmt. The example below
5934 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5935 we need to create 4 vectorized stmts):
5936
5937 before vectorization:
5938 RELATED_STMT VEC_STMT
5939 S1: x = memref - -
5940 S2: z = x + 1 - -
5941
5942 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5943 there):
5944 RELATED_STMT VEC_STMT
5945 VS1_0: vx0 = memref0 VS1_1 -
5946 VS1_1: vx1 = memref1 VS1_2 -
5947 VS1_2: vx2 = memref2 VS1_3 -
5948 VS1_3: vx3 = memref3 - -
5949 S1: x = load - VS1_0
5950 S2: z = x + 1 - -
5951
5952 step2: vectorize stmt S2 (done here):
5953 To vectorize stmt S2 we first need to find the relevant vector
5954 def for the first operand 'x'. This is, as usual, obtained from
5955 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5956 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5957 relevant vector def 'vx0'. Having found 'vx0' we can generate
5958 the vector stmt VS2_0, and as usual, record it in the
5959 STMT_VINFO_VEC_STMT of stmt S2.
5960 When creating the second copy (VS2_1), we obtain the relevant vector
5961 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5962 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5963 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5964 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5965 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5966 chain of stmts and pointers:
5967 RELATED_STMT VEC_STMT
5968 VS1_0: vx0 = memref0 VS1_1 -
5969 VS1_1: vx1 = memref1 VS1_2 -
5970 VS1_2: vx2 = memref2 VS1_3 -
5971 VS1_3: vx3 = memref3 - -
5972 S1: x = load - VS1_0
5973 VS2_0: vz0 = vx0 + v1 VS2_1 -
5974 VS2_1: vz1 = vx1 + v1 VS2_2 -
5975 VS2_2: vz2 = vx2 + v1 VS2_3 -
5976 VS2_3: vz3 = vx3 + v1 - -
5977 S2: z = x + 1 - VS2_0 */
5978
5979 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5980 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
5981 /* Arguments are ready. Create the new vector stmt. */
5982 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5983 {
5984 gimple *new_stmt = NULL;
5985 vop1 = ((op_type == binary_op || op_type == ternary_op)
5986 ? vec_oprnds1[i] : NULL_TREE);
5987 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
5988 if (masked_loop_p && reduc_idx >= 0)
5989 {
5990 /* Perform the operation on active elements only and take
5991 inactive elements from the reduction chain input. */
5992 gcc_assert (!vop2);
5993 vop2 = reduc_idx == 1 ? vop1 : vop0;
5994 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
5995 vectype, i);
5996 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
5997 vop0, vop1, vop2);
5998 new_temp = make_ssa_name (vec_dest, call);
5999 gimple_call_set_lhs (call, new_temp);
6000 gimple_call_set_nothrow (call, true);
6001 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6002 new_stmt = call;
6003 }
6004 else
6005 {
6006 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6007 new_temp = make_ssa_name (vec_dest, new_stmt);
6008 gimple_assign_set_lhs (new_stmt, new_temp);
6009 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6010 if (vec_cvt_dest)
6011 {
6012 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6013 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6014 new_temp);
6015 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6016 gimple_assign_set_lhs (new_stmt, new_temp);
6017 vect_finish_stmt_generation (vinfo, stmt_info,
6018 new_stmt, gsi);
6019 }
6020 }
6021 if (slp_node)
6022 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6023 else
6024 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6025 }
6026
6027 if (!slp_node)
6028 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6029
6030 vec_oprnds0.release ();
6031 vec_oprnds1.release ();
6032 vec_oprnds2.release ();
6033
6034 return true;
6035 }
6036
6037 /* A helper function to ensure data reference DR_INFO's base alignment. */
6038
6039 static void
6040 ensure_base_align (dr_vec_info *dr_info)
6041 {
6042 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6043 return;
6044
6045 if (dr_info->base_misaligned)
6046 {
6047 tree base_decl = dr_info->base_decl;
6048
6049 // We should only be able to increase the alignment of a base object if
6050 // we know what its new alignment should be at compile time.
6051 unsigned HOST_WIDE_INT align_base_to =
6052 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6053
6054 if (decl_in_symtab_p (base_decl))
6055 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6056 else if (DECL_ALIGN (base_decl) < align_base_to)
6057 {
6058 SET_DECL_ALIGN (base_decl, align_base_to);
6059 DECL_USER_ALIGN (base_decl) = 1;
6060 }
6061 dr_info->base_misaligned = false;
6062 }
6063 }
6064
6065
6066 /* Function get_group_alias_ptr_type.
6067
6068 Return the alias type for the group starting at FIRST_STMT_INFO. */
6069
6070 static tree
6071 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6072 {
6073 struct data_reference *first_dr, *next_dr;
6074
6075 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6076 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6077 while (next_stmt_info)
6078 {
6079 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6080 if (get_alias_set (DR_REF (first_dr))
6081 != get_alias_set (DR_REF (next_dr)))
6082 {
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_NOTE, vect_location,
6085 "conflicting alias set types.\n");
6086 return ptr_type_node;
6087 }
6088 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6089 }
6090 return reference_alias_ptr_type (DR_REF (first_dr));
6091 }
6092
6093
6094 /* Function scan_operand_equal_p.
6095
6096 Helper function for check_scan_store. Compare two references
6097 with .GOMP_SIMD_LANE bases. */
6098
6099 static bool
6100 scan_operand_equal_p (tree ref1, tree ref2)
6101 {
6102 tree ref[2] = { ref1, ref2 };
6103 poly_int64 bitsize[2], bitpos[2];
6104 tree offset[2], base[2];
6105 for (int i = 0; i < 2; ++i)
6106 {
6107 machine_mode mode;
6108 int unsignedp, reversep, volatilep = 0;
6109 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6110 &offset[i], &mode, &unsignedp,
6111 &reversep, &volatilep);
6112 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6113 return false;
6114 if (TREE_CODE (base[i]) == MEM_REF
6115 && offset[i] == NULL_TREE
6116 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6117 {
6118 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6119 if (is_gimple_assign (def_stmt)
6120 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6121 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6122 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6123 {
6124 if (maybe_ne (mem_ref_offset (base[i]), 0))
6125 return false;
6126 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6127 offset[i] = gimple_assign_rhs2 (def_stmt);
6128 }
6129 }
6130 }
6131
6132 if (!operand_equal_p (base[0], base[1], 0))
6133 return false;
6134 if (maybe_ne (bitsize[0], bitsize[1]))
6135 return false;
6136 if (offset[0] != offset[1])
6137 {
6138 if (!offset[0] || !offset[1])
6139 return false;
6140 if (!operand_equal_p (offset[0], offset[1], 0))
6141 {
6142 tree step[2];
6143 for (int i = 0; i < 2; ++i)
6144 {
6145 step[i] = integer_one_node;
6146 if (TREE_CODE (offset[i]) == SSA_NAME)
6147 {
6148 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6149 if (is_gimple_assign (def_stmt)
6150 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6151 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6152 == INTEGER_CST))
6153 {
6154 step[i] = gimple_assign_rhs2 (def_stmt);
6155 offset[i] = gimple_assign_rhs1 (def_stmt);
6156 }
6157 }
6158 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6159 {
6160 step[i] = TREE_OPERAND (offset[i], 1);
6161 offset[i] = TREE_OPERAND (offset[i], 0);
6162 }
6163 tree rhs1 = NULL_TREE;
6164 if (TREE_CODE (offset[i]) == SSA_NAME)
6165 {
6166 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6167 if (gimple_assign_cast_p (def_stmt))
6168 rhs1 = gimple_assign_rhs1 (def_stmt);
6169 }
6170 else if (CONVERT_EXPR_P (offset[i]))
6171 rhs1 = TREE_OPERAND (offset[i], 0);
6172 if (rhs1
6173 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6174 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6175 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6176 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6177 offset[i] = rhs1;
6178 }
6179 if (!operand_equal_p (offset[0], offset[1], 0)
6180 || !operand_equal_p (step[0], step[1], 0))
6181 return false;
6182 }
6183 }
6184 return true;
6185 }
6186
6187
6188 enum scan_store_kind {
6189 /* Normal permutation. */
6190 scan_store_kind_perm,
6191
6192 /* Whole vector left shift permutation with zero init. */
6193 scan_store_kind_lshift_zero,
6194
6195 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6196 scan_store_kind_lshift_cond
6197 };
6198
6199 /* Function check_scan_store.
6200
6201 Verify if we can perform the needed permutations or whole vector shifts.
6202 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6203 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6204 to do at each step. */
6205
6206 static int
6207 scan_store_can_perm_p (tree vectype, tree init,
6208 vec<enum scan_store_kind> *use_whole_vector = NULL)
6209 {
6210 enum machine_mode vec_mode = TYPE_MODE (vectype);
6211 unsigned HOST_WIDE_INT nunits;
6212 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6213 return -1;
6214 int units_log2 = exact_log2 (nunits);
6215 if (units_log2 <= 0)
6216 return -1;
6217
6218 int i;
6219 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6220 for (i = 0; i <= units_log2; ++i)
6221 {
6222 unsigned HOST_WIDE_INT j, k;
6223 enum scan_store_kind kind = scan_store_kind_perm;
6224 vec_perm_builder sel (nunits, nunits, 1);
6225 sel.quick_grow (nunits);
6226 if (i == units_log2)
6227 {
6228 for (j = 0; j < nunits; ++j)
6229 sel[j] = nunits - 1;
6230 }
6231 else
6232 {
6233 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6234 sel[j] = j;
6235 for (k = 0; j < nunits; ++j, ++k)
6236 sel[j] = nunits + k;
6237 }
6238 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6239 if (!can_vec_perm_const_p (vec_mode, indices))
6240 {
6241 if (i == units_log2)
6242 return -1;
6243
6244 if (whole_vector_shift_kind == scan_store_kind_perm)
6245 {
6246 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6247 return -1;
6248 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6249 /* Whole vector shifts shift in zeros, so if init is all zero
6250 constant, there is no need to do anything further. */
6251 if ((TREE_CODE (init) != INTEGER_CST
6252 && TREE_CODE (init) != REAL_CST)
6253 || !initializer_zerop (init))
6254 {
6255 tree masktype = truth_type_for (vectype);
6256 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6257 return -1;
6258 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6259 }
6260 }
6261 kind = whole_vector_shift_kind;
6262 }
6263 if (use_whole_vector)
6264 {
6265 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6266 use_whole_vector->safe_grow_cleared (i);
6267 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6268 use_whole_vector->safe_push (kind);
6269 }
6270 }
6271
6272 return units_log2;
6273 }
6274
6275
6276 /* Function check_scan_store.
6277
6278 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6279
6280 static bool
6281 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6282 enum vect_def_type rhs_dt, bool slp, tree mask,
6283 vect_memory_access_type memory_access_type)
6284 {
6285 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6286 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6287 tree ref_type;
6288
6289 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6290 if (slp
6291 || mask
6292 || memory_access_type != VMAT_CONTIGUOUS
6293 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6294 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6295 || loop_vinfo == NULL
6296 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6297 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6298 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6299 || !integer_zerop (DR_INIT (dr_info->dr))
6300 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6301 || !alias_sets_conflict_p (get_alias_set (vectype),
6302 get_alias_set (TREE_TYPE (ref_type))))
6303 {
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6306 "unsupported OpenMP scan store.\n");
6307 return false;
6308 }
6309
6310 /* We need to pattern match code built by OpenMP lowering and simplified
6311 by following optimizations into something we can handle.
6312 #pragma omp simd reduction(inscan,+:r)
6313 for (...)
6314 {
6315 r += something ();
6316 #pragma omp scan inclusive (r)
6317 use (r);
6318 }
6319 shall have body with:
6320 // Initialization for input phase, store the reduction initializer:
6321 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6322 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6323 D.2042[_21] = 0;
6324 // Actual input phase:
6325 ...
6326 r.0_5 = D.2042[_20];
6327 _6 = _4 + r.0_5;
6328 D.2042[_20] = _6;
6329 // Initialization for scan phase:
6330 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6331 _26 = D.2043[_25];
6332 _27 = D.2042[_25];
6333 _28 = _26 + _27;
6334 D.2043[_25] = _28;
6335 D.2042[_25] = _28;
6336 // Actual scan phase:
6337 ...
6338 r.1_8 = D.2042[_20];
6339 ...
6340 The "omp simd array" variable D.2042 holds the privatized copy used
6341 inside of the loop and D.2043 is another one that holds copies of
6342 the current original list item. The separate GOMP_SIMD_LANE ifn
6343 kinds are there in order to allow optimizing the initializer store
6344 and combiner sequence, e.g. if it is originally some C++ish user
6345 defined reduction, but allow the vectorizer to pattern recognize it
6346 and turn into the appropriate vectorized scan.
6347
6348 For exclusive scan, this is slightly different:
6349 #pragma omp simd reduction(inscan,+:r)
6350 for (...)
6351 {
6352 use (r);
6353 #pragma omp scan exclusive (r)
6354 r += something ();
6355 }
6356 shall have body with:
6357 // Initialization for input phase, store the reduction initializer:
6358 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6359 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6360 D.2042[_21] = 0;
6361 // Actual input phase:
6362 ...
6363 r.0_5 = D.2042[_20];
6364 _6 = _4 + r.0_5;
6365 D.2042[_20] = _6;
6366 // Initialization for scan phase:
6367 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6368 _26 = D.2043[_25];
6369 D.2044[_25] = _26;
6370 _27 = D.2042[_25];
6371 _28 = _26 + _27;
6372 D.2043[_25] = _28;
6373 // Actual scan phase:
6374 ...
6375 r.1_8 = D.2044[_20];
6376 ... */
6377
6378 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6379 {
6380 /* Match the D.2042[_21] = 0; store above. Just require that
6381 it is a constant or external definition store. */
6382 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6383 {
6384 fail_init:
6385 if (dump_enabled_p ())
6386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6387 "unsupported OpenMP scan initializer store.\n");
6388 return false;
6389 }
6390
6391 if (! loop_vinfo->scan_map)
6392 loop_vinfo->scan_map = new hash_map<tree, tree>;
6393 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6394 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6395 if (cached)
6396 goto fail_init;
6397 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6398
6399 /* These stores can be vectorized normally. */
6400 return true;
6401 }
6402
6403 if (rhs_dt != vect_internal_def)
6404 {
6405 fail:
6406 if (dump_enabled_p ())
6407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6408 "unsupported OpenMP scan combiner pattern.\n");
6409 return false;
6410 }
6411
6412 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6413 tree rhs = gimple_assign_rhs1 (stmt);
6414 if (TREE_CODE (rhs) != SSA_NAME)
6415 goto fail;
6416
6417 gimple *other_store_stmt = NULL;
6418 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6419 bool inscan_var_store
6420 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6421
6422 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6423 {
6424 if (!inscan_var_store)
6425 {
6426 use_operand_p use_p;
6427 imm_use_iterator iter;
6428 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6429 {
6430 gimple *use_stmt = USE_STMT (use_p);
6431 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6432 continue;
6433 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6434 || !is_gimple_assign (use_stmt)
6435 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6436 || other_store_stmt
6437 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6438 goto fail;
6439 other_store_stmt = use_stmt;
6440 }
6441 if (other_store_stmt == NULL)
6442 goto fail;
6443 rhs = gimple_assign_lhs (other_store_stmt);
6444 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6445 goto fail;
6446 }
6447 }
6448 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6449 {
6450 use_operand_p use_p;
6451 imm_use_iterator iter;
6452 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6453 {
6454 gimple *use_stmt = USE_STMT (use_p);
6455 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6456 continue;
6457 if (other_store_stmt)
6458 goto fail;
6459 other_store_stmt = use_stmt;
6460 }
6461 }
6462 else
6463 goto fail;
6464
6465 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6466 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6467 || !is_gimple_assign (def_stmt)
6468 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6469 goto fail;
6470
6471 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6472 /* For pointer addition, we should use the normal plus for the vector
6473 operation. */
6474 switch (code)
6475 {
6476 case POINTER_PLUS_EXPR:
6477 code = PLUS_EXPR;
6478 break;
6479 case MULT_HIGHPART_EXPR:
6480 goto fail;
6481 default:
6482 break;
6483 }
6484 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6485 goto fail;
6486
6487 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6488 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6489 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6490 goto fail;
6491
6492 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6493 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6494 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6495 || !gimple_assign_load_p (load1_stmt)
6496 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6497 || !gimple_assign_load_p (load2_stmt))
6498 goto fail;
6499
6500 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6501 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6502 if (load1_stmt_info == NULL
6503 || load2_stmt_info == NULL
6504 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6505 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6506 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6507 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6508 goto fail;
6509
6510 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6511 {
6512 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6513 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6514 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6515 goto fail;
6516 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6517 tree lrhs;
6518 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6519 lrhs = rhs1;
6520 else
6521 lrhs = rhs2;
6522 use_operand_p use_p;
6523 imm_use_iterator iter;
6524 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6525 {
6526 gimple *use_stmt = USE_STMT (use_p);
6527 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6528 continue;
6529 if (other_store_stmt)
6530 goto fail;
6531 other_store_stmt = use_stmt;
6532 }
6533 }
6534
6535 if (other_store_stmt == NULL)
6536 goto fail;
6537 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6538 || !gimple_store_p (other_store_stmt))
6539 goto fail;
6540
6541 stmt_vec_info other_store_stmt_info
6542 = loop_vinfo->lookup_stmt (other_store_stmt);
6543 if (other_store_stmt_info == NULL
6544 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6545 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6546 goto fail;
6547
6548 gimple *stmt1 = stmt;
6549 gimple *stmt2 = other_store_stmt;
6550 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6551 std::swap (stmt1, stmt2);
6552 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6553 gimple_assign_rhs1 (load2_stmt)))
6554 {
6555 std::swap (rhs1, rhs2);
6556 std::swap (load1_stmt, load2_stmt);
6557 std::swap (load1_stmt_info, load2_stmt_info);
6558 }
6559 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6560 gimple_assign_rhs1 (load1_stmt)))
6561 goto fail;
6562
6563 tree var3 = NULL_TREE;
6564 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6565 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6566 gimple_assign_rhs1 (load2_stmt)))
6567 goto fail;
6568 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6569 {
6570 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6571 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6572 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6573 goto fail;
6574 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6575 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6576 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6577 || lookup_attribute ("omp simd inscan exclusive",
6578 DECL_ATTRIBUTES (var3)))
6579 goto fail;
6580 }
6581
6582 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6583 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6584 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6585 goto fail;
6586
6587 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6588 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6589 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6590 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6591 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6592 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6593 goto fail;
6594
6595 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6596 std::swap (var1, var2);
6597
6598 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6599 {
6600 if (!lookup_attribute ("omp simd inscan exclusive",
6601 DECL_ATTRIBUTES (var1)))
6602 goto fail;
6603 var1 = var3;
6604 }
6605
6606 if (loop_vinfo->scan_map == NULL)
6607 goto fail;
6608 tree *init = loop_vinfo->scan_map->get (var1);
6609 if (init == NULL)
6610 goto fail;
6611
6612 /* The IL is as expected, now check if we can actually vectorize it.
6613 Inclusive scan:
6614 _26 = D.2043[_25];
6615 _27 = D.2042[_25];
6616 _28 = _26 + _27;
6617 D.2043[_25] = _28;
6618 D.2042[_25] = _28;
6619 should be vectorized as (where _40 is the vectorized rhs
6620 from the D.2042[_21] = 0; store):
6621 _30 = MEM <vector(8) int> [(int *)&D.2043];
6622 _31 = MEM <vector(8) int> [(int *)&D.2042];
6623 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6624 _33 = _31 + _32;
6625 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6626 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6627 _35 = _33 + _34;
6628 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6629 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6630 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6631 _37 = _35 + _36;
6632 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6633 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6634 _38 = _30 + _37;
6635 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6636 MEM <vector(8) int> [(int *)&D.2043] = _39;
6637 MEM <vector(8) int> [(int *)&D.2042] = _38;
6638 Exclusive scan:
6639 _26 = D.2043[_25];
6640 D.2044[_25] = _26;
6641 _27 = D.2042[_25];
6642 _28 = _26 + _27;
6643 D.2043[_25] = _28;
6644 should be vectorized as (where _40 is the vectorized rhs
6645 from the D.2042[_21] = 0; store):
6646 _30 = MEM <vector(8) int> [(int *)&D.2043];
6647 _31 = MEM <vector(8) int> [(int *)&D.2042];
6648 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6649 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6650 _34 = _32 + _33;
6651 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6652 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6653 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6654 _36 = _34 + _35;
6655 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6656 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6657 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6658 _38 = _36 + _37;
6659 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6660 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6661 _39 = _30 + _38;
6662 _50 = _31 + _39;
6663 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6664 MEM <vector(8) int> [(int *)&D.2044] = _39;
6665 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6666 enum machine_mode vec_mode = TYPE_MODE (vectype);
6667 optab optab = optab_for_tree_code (code, vectype, optab_default);
6668 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6669 goto fail;
6670
6671 int units_log2 = scan_store_can_perm_p (vectype, *init);
6672 if (units_log2 == -1)
6673 goto fail;
6674
6675 return true;
6676 }
6677
6678
6679 /* Function vectorizable_scan_store.
6680
6681 Helper of vectorizable_score, arguments like on vectorizable_store.
6682 Handle only the transformation, checking is done in check_scan_store. */
6683
6684 static bool
6685 vectorizable_scan_store (vec_info *vinfo,
6686 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6687 gimple **vec_stmt, int ncopies)
6688 {
6689 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6690 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6691 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6692 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6693
6694 if (dump_enabled_p ())
6695 dump_printf_loc (MSG_NOTE, vect_location,
6696 "transform scan store. ncopies = %d\n", ncopies);
6697
6698 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6699 tree rhs = gimple_assign_rhs1 (stmt);
6700 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6701
6702 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6703 bool inscan_var_store
6704 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6705
6706 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6707 {
6708 use_operand_p use_p;
6709 imm_use_iterator iter;
6710 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6711 {
6712 gimple *use_stmt = USE_STMT (use_p);
6713 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6714 continue;
6715 rhs = gimple_assign_lhs (use_stmt);
6716 break;
6717 }
6718 }
6719
6720 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6721 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6722 if (code == POINTER_PLUS_EXPR)
6723 code = PLUS_EXPR;
6724 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6725 && commutative_tree_code (code));
6726 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6727 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6728 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6729 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6730 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6731 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6732 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6733 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6734 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6735 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6736 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6737
6738 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6739 {
6740 std::swap (rhs1, rhs2);
6741 std::swap (var1, var2);
6742 std::swap (load1_dr_info, load2_dr_info);
6743 }
6744
6745 tree *init = loop_vinfo->scan_map->get (var1);
6746 gcc_assert (init);
6747
6748 unsigned HOST_WIDE_INT nunits;
6749 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6750 gcc_unreachable ();
6751 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6752 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6753 gcc_assert (units_log2 > 0);
6754 auto_vec<tree, 16> perms;
6755 perms.quick_grow (units_log2 + 1);
6756 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6757 for (int i = 0; i <= units_log2; ++i)
6758 {
6759 unsigned HOST_WIDE_INT j, k;
6760 vec_perm_builder sel (nunits, nunits, 1);
6761 sel.quick_grow (nunits);
6762 if (i == units_log2)
6763 for (j = 0; j < nunits; ++j)
6764 sel[j] = nunits - 1;
6765 else
6766 {
6767 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6768 sel[j] = j;
6769 for (k = 0; j < nunits; ++j, ++k)
6770 sel[j] = nunits + k;
6771 }
6772 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6773 if (!use_whole_vector.is_empty ()
6774 && use_whole_vector[i] != scan_store_kind_perm)
6775 {
6776 if (zero_vec == NULL_TREE)
6777 zero_vec = build_zero_cst (vectype);
6778 if (masktype == NULL_TREE
6779 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6780 masktype = truth_type_for (vectype);
6781 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6782 }
6783 else
6784 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6785 }
6786
6787 tree vec_oprnd1 = NULL_TREE;
6788 tree vec_oprnd2 = NULL_TREE;
6789 tree vec_oprnd3 = NULL_TREE;
6790 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6791 tree dataref_offset = build_int_cst (ref_type, 0);
6792 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6793 vectype, VMAT_CONTIGUOUS);
6794 tree ldataref_ptr = NULL_TREE;
6795 tree orig = NULL_TREE;
6796 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6797 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6798 auto_vec<tree> vec_oprnds1;
6799 auto_vec<tree> vec_oprnds2;
6800 auto_vec<tree> vec_oprnds3;
6801 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6802 *init, &vec_oprnds1,
6803 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6804 rhs2, &vec_oprnds3);
6805 for (int j = 0; j < ncopies; j++)
6806 {
6807 vec_oprnd1 = vec_oprnds1[j];
6808 if (ldataref_ptr == NULL)
6809 vec_oprnd2 = vec_oprnds2[j];
6810 vec_oprnd3 = vec_oprnds3[j];
6811 if (j == 0)
6812 orig = vec_oprnd3;
6813 else if (!inscan_var_store)
6814 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6815
6816 if (ldataref_ptr)
6817 {
6818 vec_oprnd2 = make_ssa_name (vectype);
6819 tree data_ref = fold_build2 (MEM_REF, vectype,
6820 unshare_expr (ldataref_ptr),
6821 dataref_offset);
6822 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
6823 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
6824 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6825 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6826 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6827 }
6828
6829 tree v = vec_oprnd2;
6830 for (int i = 0; i < units_log2; ++i)
6831 {
6832 tree new_temp = make_ssa_name (vectype);
6833 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
6834 (zero_vec
6835 && (use_whole_vector[i]
6836 != scan_store_kind_perm))
6837 ? zero_vec : vec_oprnd1, v,
6838 perms[i]);
6839 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6840 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6841 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6842
6843 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
6844 {
6845 /* Whole vector shift shifted in zero bits, but if *init
6846 is not initializer_zerop, we need to replace those elements
6847 with elements from vec_oprnd1. */
6848 tree_vector_builder vb (masktype, nunits, 1);
6849 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
6850 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
6851 ? boolean_false_node : boolean_true_node);
6852
6853 tree new_temp2 = make_ssa_name (vectype);
6854 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
6855 new_temp, vec_oprnd1);
6856 vect_finish_stmt_generation (vinfo, stmt_info,
6857 g, gsi);
6858 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6859 new_temp = new_temp2;
6860 }
6861
6862 /* For exclusive scan, perform the perms[i] permutation once
6863 more. */
6864 if (i == 0
6865 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
6866 && v == vec_oprnd2)
6867 {
6868 v = new_temp;
6869 --i;
6870 continue;
6871 }
6872
6873 tree new_temp2 = make_ssa_name (vectype);
6874 g = gimple_build_assign (new_temp2, code, v, new_temp);
6875 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6876 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6877
6878 v = new_temp2;
6879 }
6880
6881 tree new_temp = make_ssa_name (vectype);
6882 gimple *g = gimple_build_assign (new_temp, code, orig, v);
6883 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6884 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6885
6886 tree last_perm_arg = new_temp;
6887 /* For exclusive scan, new_temp computed above is the exclusive scan
6888 prefix sum. Turn it into inclusive prefix sum for the broadcast
6889 of the last element into orig. */
6890 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6891 {
6892 last_perm_arg = make_ssa_name (vectype);
6893 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
6894 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6895 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6896 }
6897
6898 orig = make_ssa_name (vectype);
6899 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
6900 last_perm_arg, perms[units_log2]);
6901 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6902 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6903
6904 if (!inscan_var_store)
6905 {
6906 tree data_ref = fold_build2 (MEM_REF, vectype,
6907 unshare_expr (dataref_ptr),
6908 dataref_offset);
6909 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
6910 g = gimple_build_assign (data_ref, new_temp);
6911 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6912 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6913 }
6914 }
6915
6916 if (inscan_var_store)
6917 for (int j = 0; j < ncopies; j++)
6918 {
6919 if (j != 0)
6920 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6921
6922 tree data_ref = fold_build2 (MEM_REF, vectype,
6923 unshare_expr (dataref_ptr),
6924 dataref_offset);
6925 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
6926 gimple *g = gimple_build_assign (data_ref, orig);
6927 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6928 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6929 }
6930 return true;
6931 }
6932
6933
6934 /* Function vectorizable_store.
6935
6936 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6937 that can be vectorized.
6938 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6939 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6940 Return true if STMT_INFO is vectorizable in this way. */
6941
6942 static bool
6943 vectorizable_store (vec_info *vinfo,
6944 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6945 gimple **vec_stmt, slp_tree slp_node,
6946 stmt_vector_for_cost *cost_vec)
6947 {
6948 tree data_ref;
6949 tree op;
6950 tree vec_oprnd = NULL_TREE;
6951 tree elem_type;
6952 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6953 class loop *loop = NULL;
6954 machine_mode vec_mode;
6955 tree dummy;
6956 enum dr_alignment_support alignment_support_scheme;
6957 enum vect_def_type rhs_dt = vect_unknown_def_type;
6958 enum vect_def_type mask_dt = vect_unknown_def_type;
6959 tree dataref_ptr = NULL_TREE;
6960 tree dataref_offset = NULL_TREE;
6961 gimple *ptr_incr = NULL;
6962 int ncopies;
6963 int j;
6964 stmt_vec_info first_stmt_info;
6965 bool grouped_store;
6966 unsigned int group_size, i;
6967 vec<tree> oprnds = vNULL;
6968 vec<tree> result_chain = vNULL;
6969 tree offset = NULL_TREE;
6970 vec<tree> vec_oprnds = vNULL;
6971 bool slp = (slp_node != NULL);
6972 unsigned int vec_num;
6973 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6974 tree aggr_type;
6975 gather_scatter_info gs_info;
6976 poly_uint64 vf;
6977 vec_load_store_type vls_type;
6978 tree ref_type;
6979
6980 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6981 return false;
6982
6983 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6984 && ! vec_stmt)
6985 return false;
6986
6987 /* Is vectorizable store? */
6988
6989 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6990 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6991 {
6992 tree scalar_dest = gimple_assign_lhs (assign);
6993 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6994 && is_pattern_stmt_p (stmt_info))
6995 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6996 if (TREE_CODE (scalar_dest) != ARRAY_REF
6997 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6998 && TREE_CODE (scalar_dest) != INDIRECT_REF
6999 && TREE_CODE (scalar_dest) != COMPONENT_REF
7000 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7001 && TREE_CODE (scalar_dest) != REALPART_EXPR
7002 && TREE_CODE (scalar_dest) != MEM_REF)
7003 return false;
7004 }
7005 else
7006 {
7007 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7008 if (!call || !gimple_call_internal_p (call))
7009 return false;
7010
7011 internal_fn ifn = gimple_call_internal_fn (call);
7012 if (!internal_store_fn_p (ifn))
7013 return false;
7014
7015 if (slp_node != NULL)
7016 {
7017 if (dump_enabled_p ())
7018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7019 "SLP of masked stores not supported.\n");
7020 return false;
7021 }
7022
7023 int mask_index = internal_fn_mask_index (ifn);
7024 if (mask_index >= 0)
7025 {
7026 mask = gimple_call_arg (call, mask_index);
7027 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7028 &mask_vectype))
7029 return false;
7030 }
7031 }
7032
7033 op = vect_get_store_rhs (stmt_info);
7034
7035 /* Cannot have hybrid store SLP -- that would mean storing to the
7036 same location twice. */
7037 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7038
7039 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7040 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7041
7042 if (loop_vinfo)
7043 {
7044 loop = LOOP_VINFO_LOOP (loop_vinfo);
7045 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7046 }
7047 else
7048 vf = 1;
7049
7050 /* Multiple types in SLP are handled by creating the appropriate number of
7051 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7052 case of SLP. */
7053 if (slp)
7054 ncopies = 1;
7055 else
7056 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7057
7058 gcc_assert (ncopies >= 1);
7059
7060 /* FORNOW. This restriction should be relaxed. */
7061 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7062 {
7063 if (dump_enabled_p ())
7064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7065 "multiple types in nested loop.\n");
7066 return false;
7067 }
7068
7069 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7070 op, &rhs_dt, &rhs_vectype, &vls_type))
7071 return false;
7072
7073 elem_type = TREE_TYPE (vectype);
7074 vec_mode = TYPE_MODE (vectype);
7075
7076 if (!STMT_VINFO_DATA_REF (stmt_info))
7077 return false;
7078
7079 vect_memory_access_type memory_access_type;
7080 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, vls_type,
7081 ncopies, &memory_access_type, &gs_info))
7082 return false;
7083
7084 if (mask)
7085 {
7086 if (memory_access_type == VMAT_CONTIGUOUS)
7087 {
7088 if (!VECTOR_MODE_P (vec_mode)
7089 || !can_vec_mask_load_store_p (vec_mode,
7090 TYPE_MODE (mask_vectype), false))
7091 return false;
7092 }
7093 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7094 && (memory_access_type != VMAT_GATHER_SCATTER
7095 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7096 {
7097 if (dump_enabled_p ())
7098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7099 "unsupported access type for masked store.\n");
7100 return false;
7101 }
7102 }
7103 else
7104 {
7105 /* FORNOW. In some cases can vectorize even if data-type not supported
7106 (e.g. - array initialization with 0). */
7107 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7108 return false;
7109 }
7110
7111 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7112 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7113 && memory_access_type != VMAT_GATHER_SCATTER
7114 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7115 if (grouped_store)
7116 {
7117 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7118 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7119 group_size = DR_GROUP_SIZE (first_stmt_info);
7120 }
7121 else
7122 {
7123 first_stmt_info = stmt_info;
7124 first_dr_info = dr_info;
7125 group_size = vec_num = 1;
7126 }
7127
7128 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7129 {
7130 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7131 memory_access_type))
7132 return false;
7133 }
7134
7135 if (!vec_stmt) /* transformation not required. */
7136 {
7137 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7138
7139 if (loop_vinfo
7140 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7141 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7142 memory_access_type, &gs_info, mask);
7143
7144 if (slp_node
7145 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7146 vectype))
7147 {
7148 if (dump_enabled_p ())
7149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7150 "incompatible vector types for invariants\n");
7151 return false;
7152 }
7153
7154 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7155 vect_model_store_cost (vinfo, stmt_info, ncopies,
7156 memory_access_type, vls_type, slp_node, cost_vec);
7157 return true;
7158 }
7159 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7160
7161 /* Transform. */
7162
7163 ensure_base_align (dr_info);
7164
7165 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7166 {
7167 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7168 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7169 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7170 tree ptr, var, scale, vec_mask;
7171 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7172 tree mask_halfvectype = mask_vectype;
7173 edge pe = loop_preheader_edge (loop);
7174 gimple_seq seq;
7175 basic_block new_bb;
7176 enum { NARROW, NONE, WIDEN } modifier;
7177 poly_uint64 scatter_off_nunits
7178 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7179
7180 if (known_eq (nunits, scatter_off_nunits))
7181 modifier = NONE;
7182 else if (known_eq (nunits * 2, scatter_off_nunits))
7183 {
7184 modifier = WIDEN;
7185
7186 /* Currently gathers and scatters are only supported for
7187 fixed-length vectors. */
7188 unsigned int count = scatter_off_nunits.to_constant ();
7189 vec_perm_builder sel (count, count, 1);
7190 for (i = 0; i < (unsigned int) count; ++i)
7191 sel.quick_push (i | (count / 2));
7192
7193 vec_perm_indices indices (sel, 1, count);
7194 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7195 indices);
7196 gcc_assert (perm_mask != NULL_TREE);
7197 }
7198 else if (known_eq (nunits, scatter_off_nunits * 2))
7199 {
7200 modifier = NARROW;
7201
7202 /* Currently gathers and scatters are only supported for
7203 fixed-length vectors. */
7204 unsigned int count = nunits.to_constant ();
7205 vec_perm_builder sel (count, count, 1);
7206 for (i = 0; i < (unsigned int) count; ++i)
7207 sel.quick_push (i | (count / 2));
7208
7209 vec_perm_indices indices (sel, 2, count);
7210 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7211 gcc_assert (perm_mask != NULL_TREE);
7212 ncopies *= 2;
7213
7214 if (mask)
7215 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7216 }
7217 else
7218 gcc_unreachable ();
7219
7220 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7221 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7222 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7223 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7224 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7225 scaletype = TREE_VALUE (arglist);
7226
7227 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7228 && TREE_CODE (rettype) == VOID_TYPE);
7229
7230 ptr = fold_convert (ptrtype, gs_info.base);
7231 if (!is_gimple_min_invariant (ptr))
7232 {
7233 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7234 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7235 gcc_assert (!new_bb);
7236 }
7237
7238 if (mask == NULL_TREE)
7239 {
7240 mask_arg = build_int_cst (masktype, -1);
7241 mask_arg = vect_init_vector (vinfo, stmt_info,
7242 mask_arg, masktype, NULL);
7243 }
7244
7245 scale = build_int_cst (scaletype, gs_info.scale);
7246
7247 auto_vec<tree> vec_oprnds0;
7248 auto_vec<tree> vec_oprnds1;
7249 auto_vec<tree> vec_masks;
7250 if (mask)
7251 {
7252 tree mask_vectype = truth_type_for (vectype);
7253 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7254 modifier == NARROW
7255 ? ncopies / 2 : ncopies,
7256 mask, &vec_masks, mask_vectype);
7257 }
7258 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7259 modifier == WIDEN
7260 ? ncopies / 2 : ncopies,
7261 gs_info.offset, &vec_oprnds0);
7262 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7263 modifier == NARROW
7264 ? ncopies / 2 : ncopies,
7265 op, &vec_oprnds1);
7266 for (j = 0; j < ncopies; ++j)
7267 {
7268 if (modifier == WIDEN)
7269 {
7270 if (j & 1)
7271 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7272 perm_mask, stmt_info, gsi);
7273 else
7274 op = vec_oprnd0 = vec_oprnds0[j / 2];
7275 src = vec_oprnd1 = vec_oprnds1[j];
7276 if (mask)
7277 mask_op = vec_mask = vec_masks[j];
7278 }
7279 else if (modifier == NARROW)
7280 {
7281 if (j & 1)
7282 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7283 perm_mask, stmt_info, gsi);
7284 else
7285 src = vec_oprnd1 = vec_oprnds1[j / 2];
7286 op = vec_oprnd0 = vec_oprnds0[j];
7287 if (mask)
7288 mask_op = vec_mask = vec_masks[j / 2];
7289 }
7290 else
7291 {
7292 op = vec_oprnd0 = vec_oprnds0[j];
7293 src = vec_oprnd1 = vec_oprnds1[j];
7294 if (mask)
7295 mask_op = vec_mask = vec_masks[j];
7296 }
7297
7298 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7299 {
7300 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7301 TYPE_VECTOR_SUBPARTS (srctype)));
7302 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7303 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7304 gassign *new_stmt
7305 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7306 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7307 src = var;
7308 }
7309
7310 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7311 {
7312 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7313 TYPE_VECTOR_SUBPARTS (idxtype)));
7314 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7315 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7316 gassign *new_stmt
7317 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7318 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7319 op = var;
7320 }
7321
7322 if (mask)
7323 {
7324 tree utype;
7325 mask_arg = mask_op;
7326 if (modifier == NARROW)
7327 {
7328 var = vect_get_new_ssa_name (mask_halfvectype,
7329 vect_simple_var);
7330 gassign *new_stmt
7331 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7332 : VEC_UNPACK_LO_EXPR,
7333 mask_op);
7334 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7335 mask_arg = var;
7336 }
7337 tree optype = TREE_TYPE (mask_arg);
7338 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7339 utype = masktype;
7340 else
7341 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7342 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7343 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7344 gassign *new_stmt
7345 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7346 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7347 mask_arg = var;
7348 if (!useless_type_conversion_p (masktype, utype))
7349 {
7350 gcc_assert (TYPE_PRECISION (utype)
7351 <= TYPE_PRECISION (masktype));
7352 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7353 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7354 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7355 mask_arg = var;
7356 }
7357 }
7358
7359 gcall *new_stmt
7360 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7361 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7362
7363 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7364 }
7365 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7366 return true;
7367 }
7368 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7369 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7370
7371 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7372 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7373
7374 if (grouped_store)
7375 {
7376 /* FORNOW */
7377 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7378
7379 /* We vectorize all the stmts of the interleaving group when we
7380 reach the last stmt in the group. */
7381 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7382 < DR_GROUP_SIZE (first_stmt_info)
7383 && !slp)
7384 {
7385 *vec_stmt = NULL;
7386 return true;
7387 }
7388
7389 if (slp)
7390 {
7391 grouped_store = false;
7392 /* VEC_NUM is the number of vect stmts to be created for this
7393 group. */
7394 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7395 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7396 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7397 == first_stmt_info);
7398 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7399 op = vect_get_store_rhs (first_stmt_info);
7400 }
7401 else
7402 /* VEC_NUM is the number of vect stmts to be created for this
7403 group. */
7404 vec_num = group_size;
7405
7406 ref_type = get_group_alias_ptr_type (first_stmt_info);
7407 }
7408 else
7409 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7410
7411 if (dump_enabled_p ())
7412 dump_printf_loc (MSG_NOTE, vect_location,
7413 "transform store. ncopies = %d\n", ncopies);
7414
7415 if (memory_access_type == VMAT_ELEMENTWISE
7416 || memory_access_type == VMAT_STRIDED_SLP)
7417 {
7418 gimple_stmt_iterator incr_gsi;
7419 bool insert_after;
7420 gimple *incr;
7421 tree offvar;
7422 tree ivstep;
7423 tree running_off;
7424 tree stride_base, stride_step, alias_off;
7425 tree vec_oprnd;
7426 tree dr_offset;
7427 unsigned int g;
7428 /* Checked by get_load_store_type. */
7429 unsigned int const_nunits = nunits.to_constant ();
7430
7431 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7432 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7433
7434 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7435 stride_base
7436 = fold_build_pointer_plus
7437 (DR_BASE_ADDRESS (first_dr_info->dr),
7438 size_binop (PLUS_EXPR,
7439 convert_to_ptrofftype (dr_offset),
7440 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7441 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7442
7443 /* For a store with loop-invariant (but other than power-of-2)
7444 stride (i.e. not a grouped access) like so:
7445
7446 for (i = 0; i < n; i += stride)
7447 array[i] = ...;
7448
7449 we generate a new induction variable and new stores from
7450 the components of the (vectorized) rhs:
7451
7452 for (j = 0; ; j += VF*stride)
7453 vectemp = ...;
7454 tmp1 = vectemp[0];
7455 array[j] = tmp1;
7456 tmp2 = vectemp[1];
7457 array[j + stride] = tmp2;
7458 ...
7459 */
7460
7461 unsigned nstores = const_nunits;
7462 unsigned lnel = 1;
7463 tree ltype = elem_type;
7464 tree lvectype = vectype;
7465 if (slp)
7466 {
7467 if (group_size < const_nunits
7468 && const_nunits % group_size == 0)
7469 {
7470 nstores = const_nunits / group_size;
7471 lnel = group_size;
7472 ltype = build_vector_type (elem_type, group_size);
7473 lvectype = vectype;
7474
7475 /* First check if vec_extract optab doesn't support extraction
7476 of vector elts directly. */
7477 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7478 machine_mode vmode;
7479 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7480 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7481 group_size).exists (&vmode)
7482 || (convert_optab_handler (vec_extract_optab,
7483 TYPE_MODE (vectype), vmode)
7484 == CODE_FOR_nothing))
7485 {
7486 /* Try to avoid emitting an extract of vector elements
7487 by performing the extracts using an integer type of the
7488 same size, extracting from a vector of those and then
7489 re-interpreting it as the original vector type if
7490 supported. */
7491 unsigned lsize
7492 = group_size * GET_MODE_BITSIZE (elmode);
7493 unsigned int lnunits = const_nunits / group_size;
7494 /* If we can't construct such a vector fall back to
7495 element extracts from the original vector type and
7496 element size stores. */
7497 if (int_mode_for_size (lsize, 0).exists (&elmode)
7498 && VECTOR_MODE_P (TYPE_MODE (vectype))
7499 && related_vector_mode (TYPE_MODE (vectype), elmode,
7500 lnunits).exists (&vmode)
7501 && (convert_optab_handler (vec_extract_optab,
7502 vmode, elmode)
7503 != CODE_FOR_nothing))
7504 {
7505 nstores = lnunits;
7506 lnel = group_size;
7507 ltype = build_nonstandard_integer_type (lsize, 1);
7508 lvectype = build_vector_type (ltype, nstores);
7509 }
7510 /* Else fall back to vector extraction anyway.
7511 Fewer stores are more important than avoiding spilling
7512 of the vector we extract from. Compared to the
7513 construction case in vectorizable_load no store-forwarding
7514 issue exists here for reasonable archs. */
7515 }
7516 }
7517 else if (group_size >= const_nunits
7518 && group_size % const_nunits == 0)
7519 {
7520 nstores = 1;
7521 lnel = const_nunits;
7522 ltype = vectype;
7523 lvectype = vectype;
7524 }
7525 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7526 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7527 }
7528
7529 ivstep = stride_step;
7530 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7531 build_int_cst (TREE_TYPE (ivstep), vf));
7532
7533 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7534
7535 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7536 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7537 create_iv (stride_base, ivstep, NULL,
7538 loop, &incr_gsi, insert_after,
7539 &offvar, NULL);
7540 incr = gsi_stmt (incr_gsi);
7541
7542 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7543
7544 alias_off = build_int_cst (ref_type, 0);
7545 stmt_vec_info next_stmt_info = first_stmt_info;
7546 for (g = 0; g < group_size; g++)
7547 {
7548 running_off = offvar;
7549 if (g)
7550 {
7551 tree size = TYPE_SIZE_UNIT (ltype);
7552 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7553 size);
7554 tree newoff = copy_ssa_name (running_off, NULL);
7555 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7556 running_off, pos);
7557 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7558 running_off = newoff;
7559 }
7560 if (!slp)
7561 op = vect_get_store_rhs (next_stmt_info);
7562 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7563 op, &vec_oprnds);
7564 unsigned int group_el = 0;
7565 unsigned HOST_WIDE_INT
7566 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7567 for (j = 0; j < ncopies; j++)
7568 {
7569 vec_oprnd = vec_oprnds[j];
7570 /* Pun the vector to extract from if necessary. */
7571 if (lvectype != vectype)
7572 {
7573 tree tem = make_ssa_name (lvectype);
7574 gimple *pun
7575 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7576 lvectype, vec_oprnd));
7577 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7578 vec_oprnd = tem;
7579 }
7580 for (i = 0; i < nstores; i++)
7581 {
7582 tree newref, newoff;
7583 gimple *incr, *assign;
7584 tree size = TYPE_SIZE (ltype);
7585 /* Extract the i'th component. */
7586 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7587 bitsize_int (i), size);
7588 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7589 size, pos);
7590
7591 elem = force_gimple_operand_gsi (gsi, elem, true,
7592 NULL_TREE, true,
7593 GSI_SAME_STMT);
7594
7595 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7596 group_el * elsz);
7597 newref = build2 (MEM_REF, ltype,
7598 running_off, this_off);
7599 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7600
7601 /* And store it to *running_off. */
7602 assign = gimple_build_assign (newref, elem);
7603 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7604
7605 group_el += lnel;
7606 if (! slp
7607 || group_el == group_size)
7608 {
7609 newoff = copy_ssa_name (running_off, NULL);
7610 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7611 running_off, stride_step);
7612 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7613
7614 running_off = newoff;
7615 group_el = 0;
7616 }
7617 if (g == group_size - 1
7618 && !slp)
7619 {
7620 if (j == 0 && i == 0)
7621 *vec_stmt = assign;
7622 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7623 }
7624 }
7625 }
7626 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7627 if (slp)
7628 break;
7629 }
7630
7631 vec_oprnds.release ();
7632 return true;
7633 }
7634
7635 auto_vec<tree> dr_chain (group_size);
7636 oprnds.create (group_size);
7637
7638 /* Gather-scatter accesses perform only component accesses, alignment
7639 is irrelevant for them. */
7640 if (memory_access_type == VMAT_GATHER_SCATTER)
7641 alignment_support_scheme = dr_unaligned_supported;
7642 else
7643 alignment_support_scheme
7644 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7645
7646 gcc_assert (alignment_support_scheme);
7647 vec_loop_masks *loop_masks
7648 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7649 ? &LOOP_VINFO_MASKS (loop_vinfo)
7650 : NULL);
7651 /* Targets with store-lane instructions must not require explicit
7652 realignment. vect_supportable_dr_alignment always returns either
7653 dr_aligned or dr_unaligned_supported for masked operations. */
7654 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7655 && !mask
7656 && !loop_masks)
7657 || alignment_support_scheme == dr_aligned
7658 || alignment_support_scheme == dr_unaligned_supported);
7659
7660 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7661 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7662 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7663
7664 tree bump;
7665 tree vec_offset = NULL_TREE;
7666 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7667 {
7668 aggr_type = NULL_TREE;
7669 bump = NULL_TREE;
7670 }
7671 else if (memory_access_type == VMAT_GATHER_SCATTER)
7672 {
7673 aggr_type = elem_type;
7674 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7675 &bump, &vec_offset);
7676 }
7677 else
7678 {
7679 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7680 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7681 else
7682 aggr_type = vectype;
7683 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7684 memory_access_type);
7685 }
7686
7687 if (mask)
7688 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7689
7690 /* In case the vectorization factor (VF) is bigger than the number
7691 of elements that we can fit in a vectype (nunits), we have to generate
7692 more than one vector stmt - i.e - we need to "unroll" the
7693 vector stmt by a factor VF/nunits. */
7694
7695 /* In case of interleaving (non-unit grouped access):
7696
7697 S1: &base + 2 = x2
7698 S2: &base = x0
7699 S3: &base + 1 = x1
7700 S4: &base + 3 = x3
7701
7702 We create vectorized stores starting from base address (the access of the
7703 first stmt in the chain (S2 in the above example), when the last store stmt
7704 of the chain (S4) is reached:
7705
7706 VS1: &base = vx2
7707 VS2: &base + vec_size*1 = vx0
7708 VS3: &base + vec_size*2 = vx1
7709 VS4: &base + vec_size*3 = vx3
7710
7711 Then permutation statements are generated:
7712
7713 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7714 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7715 ...
7716
7717 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7718 (the order of the data-refs in the output of vect_permute_store_chain
7719 corresponds to the order of scalar stmts in the interleaving chain - see
7720 the documentation of vect_permute_store_chain()).
7721
7722 In case of both multiple types and interleaving, above vector stores and
7723 permutation stmts are created for every copy. The result vector stmts are
7724 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7725 STMT_VINFO_RELATED_STMT for the next copies.
7726 */
7727
7728 auto_vec<tree> vec_masks;
7729 tree vec_mask = NULL;
7730 auto_vec<tree> vec_offsets;
7731 auto_vec<vec<tree> > gvec_oprnds;
7732 gvec_oprnds.safe_grow_cleared (group_size);
7733 for (j = 0; j < ncopies; j++)
7734 {
7735 gimple *new_stmt;
7736 if (j == 0)
7737 {
7738 if (slp)
7739 {
7740 /* Get vectorized arguments for SLP_NODE. */
7741 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7742 op, &vec_oprnds);
7743 vec_oprnd = vec_oprnds[0];
7744 }
7745 else
7746 {
7747 /* For interleaved stores we collect vectorized defs for all the
7748 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7749 used as an input to vect_permute_store_chain().
7750
7751 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7752 and OPRNDS are of size 1. */
7753 stmt_vec_info next_stmt_info = first_stmt_info;
7754 for (i = 0; i < group_size; i++)
7755 {
7756 /* Since gaps are not supported for interleaved stores,
7757 DR_GROUP_SIZE is the exact number of stmts in the chain.
7758 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7759 that there is no interleaving, DR_GROUP_SIZE is 1,
7760 and only one iteration of the loop will be executed. */
7761 op = vect_get_store_rhs (next_stmt_info);
7762 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7763 ncopies, op, &gvec_oprnds[i]);
7764 vec_oprnd = gvec_oprnds[i][0];
7765 dr_chain.quick_push (gvec_oprnds[i][0]);
7766 oprnds.quick_push (gvec_oprnds[i][0]);
7767 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7768 }
7769 if (mask)
7770 {
7771 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7772 mask, &vec_masks, mask_vectype);
7773 vec_mask = vec_masks[0];
7774 }
7775 }
7776
7777 /* We should have catched mismatched types earlier. */
7778 gcc_assert (useless_type_conversion_p (vectype,
7779 TREE_TYPE (vec_oprnd)));
7780 bool simd_lane_access_p
7781 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7782 if (simd_lane_access_p
7783 && !loop_masks
7784 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7785 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7786 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7787 && integer_zerop (DR_INIT (first_dr_info->dr))
7788 && alias_sets_conflict_p (get_alias_set (aggr_type),
7789 get_alias_set (TREE_TYPE (ref_type))))
7790 {
7791 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7792 dataref_offset = build_int_cst (ref_type, 0);
7793 }
7794 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7795 {
7796 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
7797 &dataref_ptr, &vec_offsets, ncopies);
7798 vec_offset = vec_offsets[0];
7799 }
7800 else
7801 dataref_ptr
7802 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
7803 simd_lane_access_p ? loop : NULL,
7804 offset, &dummy, gsi, &ptr_incr,
7805 simd_lane_access_p, NULL_TREE, bump);
7806 }
7807 else
7808 {
7809 /* For interleaved stores we created vectorized defs for all the
7810 defs stored in OPRNDS in the previous iteration (previous copy).
7811 DR_CHAIN is then used as an input to vect_permute_store_chain().
7812 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7813 OPRNDS are of size 1. */
7814 for (i = 0; i < group_size; i++)
7815 {
7816 vec_oprnd = gvec_oprnds[i][j];
7817 dr_chain[i] = gvec_oprnds[i][j];
7818 oprnds[i] = gvec_oprnds[i][j];
7819 }
7820 if (mask)
7821 vec_mask = vec_masks[j];
7822 if (dataref_offset)
7823 dataref_offset
7824 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7825 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7826 vec_offset = vec_offsets[j];
7827 else
7828 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
7829 stmt_info, bump);
7830 }
7831
7832 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7833 {
7834 tree vec_array;
7835
7836 /* Get an array into which we can store the individual vectors. */
7837 vec_array = create_vector_array (vectype, vec_num);
7838
7839 /* Invalidate the current contents of VEC_ARRAY. This should
7840 become an RTL clobber too, which prevents the vector registers
7841 from being upward-exposed. */
7842 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7843
7844 /* Store the individual vectors into the array. */
7845 for (i = 0; i < vec_num; i++)
7846 {
7847 vec_oprnd = dr_chain[i];
7848 write_vector_array (vinfo, stmt_info,
7849 gsi, vec_oprnd, vec_array, i);
7850 }
7851
7852 tree final_mask = NULL;
7853 if (loop_masks)
7854 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7855 vectype, j);
7856 if (vec_mask)
7857 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7858 vec_mask, gsi);
7859
7860 gcall *call;
7861 if (final_mask)
7862 {
7863 /* Emit:
7864 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7865 VEC_ARRAY). */
7866 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7867 tree alias_ptr = build_int_cst (ref_type, align);
7868 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7869 dataref_ptr, alias_ptr,
7870 final_mask, vec_array);
7871 }
7872 else
7873 {
7874 /* Emit:
7875 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7876 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7877 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7878 vec_array);
7879 gimple_call_set_lhs (call, data_ref);
7880 }
7881 gimple_call_set_nothrow (call, true);
7882 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7883 new_stmt = call;
7884
7885 /* Record that VEC_ARRAY is now dead. */
7886 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7887 }
7888 else
7889 {
7890 new_stmt = NULL;
7891 if (grouped_store)
7892 {
7893 if (j == 0)
7894 result_chain.create (group_size);
7895 /* Permute. */
7896 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
7897 gsi, &result_chain);
7898 }
7899
7900 stmt_vec_info next_stmt_info = first_stmt_info;
7901 for (i = 0; i < vec_num; i++)
7902 {
7903 unsigned misalign;
7904 unsigned HOST_WIDE_INT align;
7905
7906 tree final_mask = NULL_TREE;
7907 if (loop_masks)
7908 final_mask = vect_get_loop_mask (gsi, loop_masks,
7909 vec_num * ncopies,
7910 vectype, vec_num * j + i);
7911 if (vec_mask)
7912 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7913 vec_mask, gsi);
7914
7915 if (memory_access_type == VMAT_GATHER_SCATTER)
7916 {
7917 tree scale = size_int (gs_info.scale);
7918 gcall *call;
7919 if (loop_masks)
7920 call = gimple_build_call_internal
7921 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7922 scale, vec_oprnd, final_mask);
7923 else
7924 call = gimple_build_call_internal
7925 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7926 scale, vec_oprnd);
7927 gimple_call_set_nothrow (call, true);
7928 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7929 new_stmt = call;
7930 break;
7931 }
7932
7933 if (i > 0)
7934 /* Bump the vector pointer. */
7935 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
7936 gsi, stmt_info, bump);
7937
7938 if (slp)
7939 vec_oprnd = vec_oprnds[i];
7940 else if (grouped_store)
7941 /* For grouped stores vectorized defs are interleaved in
7942 vect_permute_store_chain(). */
7943 vec_oprnd = result_chain[i];
7944
7945 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7946 if (aligned_access_p (first_dr_info))
7947 misalign = 0;
7948 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7949 {
7950 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
7951 misalign = 0;
7952 }
7953 else
7954 misalign = DR_MISALIGNMENT (first_dr_info);
7955 if (dataref_offset == NULL_TREE
7956 && TREE_CODE (dataref_ptr) == SSA_NAME)
7957 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7958 misalign);
7959
7960 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7961 {
7962 tree perm_mask = perm_mask_for_reverse (vectype);
7963 tree perm_dest = vect_create_destination_var
7964 (vect_get_store_rhs (stmt_info), vectype);
7965 tree new_temp = make_ssa_name (perm_dest);
7966
7967 /* Generate the permute statement. */
7968 gimple *perm_stmt
7969 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7970 vec_oprnd, perm_mask);
7971 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
7972
7973 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7974 vec_oprnd = new_temp;
7975 }
7976
7977 /* Arguments are ready. Create the new vector stmt. */
7978 if (final_mask)
7979 {
7980 align = least_bit_hwi (misalign | align);
7981 tree ptr = build_int_cst (ref_type, align);
7982 gcall *call
7983 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7984 dataref_ptr, ptr,
7985 final_mask, vec_oprnd);
7986 gimple_call_set_nothrow (call, true);
7987 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7988 new_stmt = call;
7989 }
7990 else
7991 {
7992 data_ref = fold_build2 (MEM_REF, vectype,
7993 dataref_ptr,
7994 dataref_offset
7995 ? dataref_offset
7996 : build_int_cst (ref_type, 0));
7997 if (aligned_access_p (first_dr_info))
7998 ;
7999 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8000 TREE_TYPE (data_ref)
8001 = build_aligned_type (TREE_TYPE (data_ref),
8002 align * BITS_PER_UNIT);
8003 else
8004 TREE_TYPE (data_ref)
8005 = build_aligned_type (TREE_TYPE (data_ref),
8006 TYPE_ALIGN (elem_type));
8007 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8008 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8009 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8010 }
8011
8012 if (slp)
8013 continue;
8014
8015 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8016 if (!next_stmt_info)
8017 break;
8018 }
8019 }
8020 if (!slp)
8021 {
8022 if (j == 0)
8023 *vec_stmt = new_stmt;
8024 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8025 }
8026 }
8027
8028 for (i = 0; i < group_size; ++i)
8029 {
8030 vec<tree> oprndsi = gvec_oprnds[i];
8031 oprndsi.release ();
8032 }
8033 oprnds.release ();
8034 result_chain.release ();
8035 vec_oprnds.release ();
8036
8037 return true;
8038 }
8039
8040 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8041 VECTOR_CST mask. No checks are made that the target platform supports the
8042 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8043 vect_gen_perm_mask_checked. */
8044
8045 tree
8046 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8047 {
8048 tree mask_type;
8049
8050 poly_uint64 nunits = sel.length ();
8051 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8052
8053 mask_type = build_vector_type (ssizetype, nunits);
8054 return vec_perm_indices_to_tree (mask_type, sel);
8055 }
8056
8057 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8058 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8059
8060 tree
8061 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8062 {
8063 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8064 return vect_gen_perm_mask_any (vectype, sel);
8065 }
8066
8067 /* Given a vector variable X and Y, that was generated for the scalar
8068 STMT_INFO, generate instructions to permute the vector elements of X and Y
8069 using permutation mask MASK_VEC, insert them at *GSI and return the
8070 permuted vector variable. */
8071
8072 static tree
8073 permute_vec_elements (vec_info *vinfo,
8074 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8075 gimple_stmt_iterator *gsi)
8076 {
8077 tree vectype = TREE_TYPE (x);
8078 tree perm_dest, data_ref;
8079 gimple *perm_stmt;
8080
8081 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8082 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8083 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8084 else
8085 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8086 data_ref = make_ssa_name (perm_dest);
8087
8088 /* Generate the permute statement. */
8089 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8090 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8091
8092 return data_ref;
8093 }
8094
8095 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8096 inserting them on the loops preheader edge. Returns true if we
8097 were successful in doing so (and thus STMT_INFO can be moved then),
8098 otherwise returns false. */
8099
8100 static bool
8101 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8102 {
8103 ssa_op_iter i;
8104 tree op;
8105 bool any = false;
8106
8107 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8108 {
8109 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8110 if (!gimple_nop_p (def_stmt)
8111 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8112 {
8113 /* Make sure we don't need to recurse. While we could do
8114 so in simple cases when there are more complex use webs
8115 we don't have an easy way to preserve stmt order to fulfil
8116 dependencies within them. */
8117 tree op2;
8118 ssa_op_iter i2;
8119 if (gimple_code (def_stmt) == GIMPLE_PHI)
8120 return false;
8121 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8122 {
8123 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8124 if (!gimple_nop_p (def_stmt2)
8125 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8126 return false;
8127 }
8128 any = true;
8129 }
8130 }
8131
8132 if (!any)
8133 return true;
8134
8135 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8136 {
8137 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8138 if (!gimple_nop_p (def_stmt)
8139 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8140 {
8141 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8142 gsi_remove (&gsi, false);
8143 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8144 }
8145 }
8146
8147 return true;
8148 }
8149
8150 /* vectorizable_load.
8151
8152 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8153 that can be vectorized.
8154 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8155 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8156 Return true if STMT_INFO is vectorizable in this way. */
8157
8158 static bool
8159 vectorizable_load (vec_info *vinfo,
8160 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8161 gimple **vec_stmt, slp_tree slp_node,
8162 stmt_vector_for_cost *cost_vec)
8163 {
8164 tree scalar_dest;
8165 tree vec_dest = NULL;
8166 tree data_ref = NULL;
8167 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8168 class loop *loop = NULL;
8169 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8170 bool nested_in_vect_loop = false;
8171 tree elem_type;
8172 tree new_temp;
8173 machine_mode mode;
8174 tree dummy;
8175 enum dr_alignment_support alignment_support_scheme;
8176 tree dataref_ptr = NULL_TREE;
8177 tree dataref_offset = NULL_TREE;
8178 gimple *ptr_incr = NULL;
8179 int ncopies;
8180 int i, j;
8181 unsigned int group_size;
8182 poly_uint64 group_gap_adj;
8183 tree msq = NULL_TREE, lsq;
8184 tree offset = NULL_TREE;
8185 tree byte_offset = NULL_TREE;
8186 tree realignment_token = NULL_TREE;
8187 gphi *phi = NULL;
8188 vec<tree> dr_chain = vNULL;
8189 bool grouped_load = false;
8190 stmt_vec_info first_stmt_info;
8191 stmt_vec_info first_stmt_info_for_drptr = NULL;
8192 bool compute_in_loop = false;
8193 class loop *at_loop;
8194 int vec_num;
8195 bool slp = (slp_node != NULL);
8196 bool slp_perm = false;
8197 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8198 poly_uint64 vf;
8199 tree aggr_type;
8200 gather_scatter_info gs_info;
8201 tree ref_type;
8202 enum vect_def_type mask_dt = vect_unknown_def_type;
8203
8204 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8205 return false;
8206
8207 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8208 && ! vec_stmt)
8209 return false;
8210
8211 if (!STMT_VINFO_DATA_REF (stmt_info))
8212 return false;
8213
8214 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8215 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8216 which can be different when reduction chains were re-ordered.
8217 Now that we figured we're a dataref reset stmt_info back to
8218 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8219 refactored in a way to maintain the dr_vec_info pointer for the
8220 relevant access explicitely. */
8221 stmt_vec_info orig_stmt_info = stmt_info;
8222 if (slp_node)
8223 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8224
8225 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8226 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8227 {
8228 scalar_dest = gimple_assign_lhs (assign);
8229 if (TREE_CODE (scalar_dest) != SSA_NAME)
8230 return false;
8231
8232 tree_code code = gimple_assign_rhs_code (assign);
8233 if (code != ARRAY_REF
8234 && code != BIT_FIELD_REF
8235 && code != INDIRECT_REF
8236 && code != COMPONENT_REF
8237 && code != IMAGPART_EXPR
8238 && code != REALPART_EXPR
8239 && code != MEM_REF
8240 && TREE_CODE_CLASS (code) != tcc_declaration)
8241 return false;
8242 }
8243 else
8244 {
8245 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8246 if (!call || !gimple_call_internal_p (call))
8247 return false;
8248
8249 internal_fn ifn = gimple_call_internal_fn (call);
8250 if (!internal_load_fn_p (ifn))
8251 return false;
8252
8253 scalar_dest = gimple_call_lhs (call);
8254 if (!scalar_dest)
8255 return false;
8256
8257 int mask_index = internal_fn_mask_index (ifn);
8258 if (mask_index >= 0)
8259 {
8260 mask = gimple_call_arg (call, mask_index);
8261 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8262 &mask_vectype))
8263 return false;
8264 }
8265 }
8266
8267 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8268 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8269
8270 if (loop_vinfo)
8271 {
8272 loop = LOOP_VINFO_LOOP (loop_vinfo);
8273 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8274 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8275 }
8276 else
8277 vf = 1;
8278
8279 /* Multiple types in SLP are handled by creating the appropriate number of
8280 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8281 case of SLP. */
8282 if (slp)
8283 ncopies = 1;
8284 else
8285 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8286
8287 gcc_assert (ncopies >= 1);
8288
8289 /* FORNOW. This restriction should be relaxed. */
8290 if (nested_in_vect_loop && ncopies > 1)
8291 {
8292 if (dump_enabled_p ())
8293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8294 "multiple types in nested loop.\n");
8295 return false;
8296 }
8297
8298 /* Invalidate assumptions made by dependence analysis when vectorization
8299 on the unrolled body effectively re-orders stmts. */
8300 if (ncopies > 1
8301 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8302 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8303 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8304 {
8305 if (dump_enabled_p ())
8306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8307 "cannot perform implicit CSE when unrolling "
8308 "with negative dependence distance\n");
8309 return false;
8310 }
8311
8312 elem_type = TREE_TYPE (vectype);
8313 mode = TYPE_MODE (vectype);
8314
8315 /* FORNOW. In some cases can vectorize even if data-type not supported
8316 (e.g. - data copies). */
8317 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8318 {
8319 if (dump_enabled_p ())
8320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8321 "Aligned load, but unsupported type.\n");
8322 return false;
8323 }
8324
8325 /* Check if the load is a part of an interleaving chain. */
8326 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8327 {
8328 grouped_load = true;
8329 /* FORNOW */
8330 gcc_assert (!nested_in_vect_loop);
8331 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8332
8333 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8334 group_size = DR_GROUP_SIZE (first_stmt_info);
8335
8336 /* Refuse non-SLP vectorization of SLP-only groups. */
8337 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8338 {
8339 if (dump_enabled_p ())
8340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8341 "cannot vectorize load in non-SLP mode.\n");
8342 return false;
8343 }
8344
8345 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8346 {
8347 slp_perm = true;
8348
8349 if (!loop_vinfo)
8350 {
8351 /* In BB vectorization we may not actually use a loaded vector
8352 accessing elements in excess of DR_GROUP_SIZE. */
8353 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8354 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8355 unsigned HOST_WIDE_INT nunits;
8356 unsigned j, k, maxk = 0;
8357 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8358 if (k > maxk)
8359 maxk = k;
8360 tree vectype = STMT_VINFO_VECTYPE (group_info);
8361 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8362 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8363 {
8364 if (dump_enabled_p ())
8365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8366 "BB vectorization with gaps at the end of "
8367 "a load is not supported\n");
8368 return false;
8369 }
8370 }
8371
8372 auto_vec<tree> tem;
8373 unsigned n_perms;
8374 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8375 true, &n_perms))
8376 {
8377 if (dump_enabled_p ())
8378 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8379 vect_location,
8380 "unsupported load permutation\n");
8381 return false;
8382 }
8383 }
8384
8385 /* Invalidate assumptions made by dependence analysis when vectorization
8386 on the unrolled body effectively re-orders stmts. */
8387 if (!PURE_SLP_STMT (stmt_info)
8388 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8389 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8390 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8391 {
8392 if (dump_enabled_p ())
8393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8394 "cannot perform implicit CSE when performing "
8395 "group loads with negative dependence distance\n");
8396 return false;
8397 }
8398 }
8399 else
8400 group_size = 1;
8401
8402 vect_memory_access_type memory_access_type;
8403 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, VLS_LOAD,
8404 ncopies, &memory_access_type, &gs_info))
8405 return false;
8406
8407 if (mask)
8408 {
8409 if (memory_access_type == VMAT_CONTIGUOUS)
8410 {
8411 machine_mode vec_mode = TYPE_MODE (vectype);
8412 if (!VECTOR_MODE_P (vec_mode)
8413 || !can_vec_mask_load_store_p (vec_mode,
8414 TYPE_MODE (mask_vectype), true))
8415 return false;
8416 }
8417 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8418 && memory_access_type != VMAT_GATHER_SCATTER)
8419 {
8420 if (dump_enabled_p ())
8421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8422 "unsupported access type for masked load.\n");
8423 return false;
8424 }
8425 }
8426
8427 if (!vec_stmt) /* transformation not required. */
8428 {
8429 if (!slp)
8430 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8431
8432 if (loop_vinfo
8433 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8434 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8435 memory_access_type, &gs_info, mask);
8436
8437 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8438 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8439 slp_node, cost_vec);
8440 return true;
8441 }
8442
8443 if (!slp)
8444 gcc_assert (memory_access_type
8445 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8446
8447 if (dump_enabled_p ())
8448 dump_printf_loc (MSG_NOTE, vect_location,
8449 "transform load. ncopies = %d\n", ncopies);
8450
8451 /* Transform. */
8452
8453 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8454 ensure_base_align (dr_info);
8455
8456 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8457 {
8458 vect_build_gather_load_calls (vinfo,
8459 stmt_info, gsi, vec_stmt, &gs_info, mask);
8460 return true;
8461 }
8462
8463 if (memory_access_type == VMAT_INVARIANT)
8464 {
8465 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8466 /* If we have versioned for aliasing or the loop doesn't
8467 have any data dependencies that would preclude this,
8468 then we are sure this is a loop invariant load and
8469 thus we can insert it on the preheader edge. */
8470 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8471 && !nested_in_vect_loop
8472 && hoist_defs_of_uses (stmt_info, loop));
8473 if (hoist_p)
8474 {
8475 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8476 if (dump_enabled_p ())
8477 dump_printf_loc (MSG_NOTE, vect_location,
8478 "hoisting out of the vectorized loop: %G", stmt);
8479 scalar_dest = copy_ssa_name (scalar_dest);
8480 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8481 gsi_insert_on_edge_immediate
8482 (loop_preheader_edge (loop),
8483 gimple_build_assign (scalar_dest, rhs));
8484 }
8485 /* These copies are all equivalent, but currently the representation
8486 requires a separate STMT_VINFO_VEC_STMT for each one. */
8487 gimple_stmt_iterator gsi2 = *gsi;
8488 gsi_next (&gsi2);
8489 for (j = 0; j < ncopies; j++)
8490 {
8491 if (hoist_p)
8492 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8493 vectype, NULL);
8494 else
8495 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8496 vectype, &gsi2);
8497 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8498 if (slp)
8499 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8500 else
8501 {
8502 if (j == 0)
8503 *vec_stmt = new_stmt;
8504 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8505 }
8506 }
8507 return true;
8508 }
8509
8510 if (memory_access_type == VMAT_ELEMENTWISE
8511 || memory_access_type == VMAT_STRIDED_SLP)
8512 {
8513 gimple_stmt_iterator incr_gsi;
8514 bool insert_after;
8515 tree offvar;
8516 tree ivstep;
8517 tree running_off;
8518 vec<constructor_elt, va_gc> *v = NULL;
8519 tree stride_base, stride_step, alias_off;
8520 /* Checked by get_load_store_type. */
8521 unsigned int const_nunits = nunits.to_constant ();
8522 unsigned HOST_WIDE_INT cst_offset = 0;
8523 tree dr_offset;
8524
8525 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8526 gcc_assert (!nested_in_vect_loop);
8527
8528 if (grouped_load)
8529 {
8530 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8531 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8532 }
8533 else
8534 {
8535 first_stmt_info = stmt_info;
8536 first_dr_info = dr_info;
8537 }
8538 if (slp && grouped_load)
8539 {
8540 group_size = DR_GROUP_SIZE (first_stmt_info);
8541 ref_type = get_group_alias_ptr_type (first_stmt_info);
8542 }
8543 else
8544 {
8545 if (grouped_load)
8546 cst_offset
8547 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8548 * vect_get_place_in_interleaving_chain (stmt_info,
8549 first_stmt_info));
8550 group_size = 1;
8551 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8552 }
8553
8554 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8555 stride_base
8556 = fold_build_pointer_plus
8557 (DR_BASE_ADDRESS (first_dr_info->dr),
8558 size_binop (PLUS_EXPR,
8559 convert_to_ptrofftype (dr_offset),
8560 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8561 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8562
8563 /* For a load with loop-invariant (but other than power-of-2)
8564 stride (i.e. not a grouped access) like so:
8565
8566 for (i = 0; i < n; i += stride)
8567 ... = array[i];
8568
8569 we generate a new induction variable and new accesses to
8570 form a new vector (or vectors, depending on ncopies):
8571
8572 for (j = 0; ; j += VF*stride)
8573 tmp1 = array[j];
8574 tmp2 = array[j + stride];
8575 ...
8576 vectemp = {tmp1, tmp2, ...}
8577 */
8578
8579 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8580 build_int_cst (TREE_TYPE (stride_step), vf));
8581
8582 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8583
8584 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8585 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8586 create_iv (stride_base, ivstep, NULL,
8587 loop, &incr_gsi, insert_after,
8588 &offvar, NULL);
8589
8590 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8591
8592 running_off = offvar;
8593 alias_off = build_int_cst (ref_type, 0);
8594 int nloads = const_nunits;
8595 int lnel = 1;
8596 tree ltype = TREE_TYPE (vectype);
8597 tree lvectype = vectype;
8598 auto_vec<tree> dr_chain;
8599 if (memory_access_type == VMAT_STRIDED_SLP)
8600 {
8601 if (group_size < const_nunits)
8602 {
8603 /* First check if vec_init optab supports construction from vector
8604 elts directly. Otherwise avoid emitting a constructor of
8605 vector elements by performing the loads using an integer type
8606 of the same size, constructing a vector of those and then
8607 re-interpreting it as the original vector type. This avoids a
8608 huge runtime penalty due to the general inability to perform
8609 store forwarding from smaller stores to a larger load. */
8610 tree ptype;
8611 tree vtype
8612 = vector_vector_composition_type (vectype,
8613 const_nunits / group_size,
8614 &ptype);
8615 if (vtype != NULL_TREE)
8616 {
8617 nloads = const_nunits / group_size;
8618 lnel = group_size;
8619 lvectype = vtype;
8620 ltype = ptype;
8621 }
8622 }
8623 else
8624 {
8625 nloads = 1;
8626 lnel = const_nunits;
8627 ltype = vectype;
8628 }
8629 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8630 }
8631 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8632 else if (nloads == 1)
8633 ltype = vectype;
8634
8635 if (slp)
8636 {
8637 /* For SLP permutation support we need to load the whole group,
8638 not only the number of vector stmts the permutation result
8639 fits in. */
8640 if (slp_perm)
8641 {
8642 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8643 variable VF. */
8644 unsigned int const_vf = vf.to_constant ();
8645 ncopies = CEIL (group_size * const_vf, const_nunits);
8646 dr_chain.create (ncopies);
8647 }
8648 else
8649 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8650 }
8651 unsigned int group_el = 0;
8652 unsigned HOST_WIDE_INT
8653 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8654 for (j = 0; j < ncopies; j++)
8655 {
8656 if (nloads > 1)
8657 vec_alloc (v, nloads);
8658 gimple *new_stmt = NULL;
8659 for (i = 0; i < nloads; i++)
8660 {
8661 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8662 group_el * elsz + cst_offset);
8663 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8664 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8665 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8666 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8667 if (nloads > 1)
8668 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8669 gimple_assign_lhs (new_stmt));
8670
8671 group_el += lnel;
8672 if (! slp
8673 || group_el == group_size)
8674 {
8675 tree newoff = copy_ssa_name (running_off);
8676 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8677 running_off, stride_step);
8678 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8679
8680 running_off = newoff;
8681 group_el = 0;
8682 }
8683 }
8684 if (nloads > 1)
8685 {
8686 tree vec_inv = build_constructor (lvectype, v);
8687 new_temp = vect_init_vector (vinfo, stmt_info,
8688 vec_inv, lvectype, gsi);
8689 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8690 if (lvectype != vectype)
8691 {
8692 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8693 VIEW_CONVERT_EXPR,
8694 build1 (VIEW_CONVERT_EXPR,
8695 vectype, new_temp));
8696 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8697 }
8698 }
8699
8700 if (slp)
8701 {
8702 if (slp_perm)
8703 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8704 else
8705 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8706 }
8707 else
8708 {
8709 if (j == 0)
8710 *vec_stmt = new_stmt;
8711 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8712 }
8713 }
8714 if (slp_perm)
8715 {
8716 unsigned n_perms;
8717 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8718 false, &n_perms);
8719 }
8720 return true;
8721 }
8722
8723 if (memory_access_type == VMAT_GATHER_SCATTER
8724 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8725 grouped_load = false;
8726
8727 if (grouped_load)
8728 {
8729 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8730 group_size = DR_GROUP_SIZE (first_stmt_info);
8731 /* For SLP vectorization we directly vectorize a subchain
8732 without permutation. */
8733 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8734 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8735 /* For BB vectorization always use the first stmt to base
8736 the data ref pointer on. */
8737 if (bb_vinfo)
8738 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8739
8740 /* Check if the chain of loads is already vectorized. */
8741 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
8742 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8743 ??? But we can only do so if there is exactly one
8744 as we have no way to get at the rest. Leave the CSE
8745 opportunity alone.
8746 ??? With the group load eventually participating
8747 in multiple different permutations (having multiple
8748 slp nodes which refer to the same group) the CSE
8749 is even wrong code. See PR56270. */
8750 && !slp)
8751 {
8752 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8753 return true;
8754 }
8755 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8756 group_gap_adj = 0;
8757
8758 /* VEC_NUM is the number of vect stmts to be created for this group. */
8759 if (slp)
8760 {
8761 grouped_load = false;
8762 /* If an SLP permutation is from N elements to N elements,
8763 and if one vector holds a whole number of N, we can load
8764 the inputs to the permutation in the same way as an
8765 unpermuted sequence. In other cases we need to load the
8766 whole group, not only the number of vector stmts the
8767 permutation result fits in. */
8768 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
8769 if (slp_perm
8770 && (group_size != scalar_lanes
8771 || !multiple_p (nunits, group_size)))
8772 {
8773 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8774 variable VF; see vect_transform_slp_perm_load. */
8775 unsigned int const_vf = vf.to_constant ();
8776 unsigned int const_nunits = nunits.to_constant ();
8777 vec_num = CEIL (group_size * const_vf, const_nunits);
8778 group_gap_adj = vf * group_size - nunits * vec_num;
8779 }
8780 else
8781 {
8782 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8783 group_gap_adj
8784 = group_size - scalar_lanes;
8785 }
8786 }
8787 else
8788 vec_num = group_size;
8789
8790 ref_type = get_group_alias_ptr_type (first_stmt_info);
8791 }
8792 else
8793 {
8794 first_stmt_info = stmt_info;
8795 first_dr_info = dr_info;
8796 group_size = vec_num = 1;
8797 group_gap_adj = 0;
8798 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8799 }
8800
8801 /* Gather-scatter accesses perform only component accesses, alignment
8802 is irrelevant for them. */
8803 if (memory_access_type == VMAT_GATHER_SCATTER)
8804 alignment_support_scheme = dr_unaligned_supported;
8805 else
8806 alignment_support_scheme
8807 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
8808
8809 gcc_assert (alignment_support_scheme);
8810 vec_loop_masks *loop_masks
8811 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8812 ? &LOOP_VINFO_MASKS (loop_vinfo)
8813 : NULL);
8814 /* Targets with store-lane instructions must not require explicit
8815 realignment. vect_supportable_dr_alignment always returns either
8816 dr_aligned or dr_unaligned_supported for masked operations. */
8817 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8818 && !mask
8819 && !loop_masks)
8820 || alignment_support_scheme == dr_aligned
8821 || alignment_support_scheme == dr_unaligned_supported);
8822
8823 /* In case the vectorization factor (VF) is bigger than the number
8824 of elements that we can fit in a vectype (nunits), we have to generate
8825 more than one vector stmt - i.e - we need to "unroll" the
8826 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8827 from one copy of the vector stmt to the next, in the field
8828 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8829 stages to find the correct vector defs to be used when vectorizing
8830 stmts that use the defs of the current stmt. The example below
8831 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8832 need to create 4 vectorized stmts):
8833
8834 before vectorization:
8835 RELATED_STMT VEC_STMT
8836 S1: x = memref - -
8837 S2: z = x + 1 - -
8838
8839 step 1: vectorize stmt S1:
8840 We first create the vector stmt VS1_0, and, as usual, record a
8841 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8842 Next, we create the vector stmt VS1_1, and record a pointer to
8843 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8844 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8845 stmts and pointers:
8846 RELATED_STMT VEC_STMT
8847 VS1_0: vx0 = memref0 VS1_1 -
8848 VS1_1: vx1 = memref1 VS1_2 -
8849 VS1_2: vx2 = memref2 VS1_3 -
8850 VS1_3: vx3 = memref3 - -
8851 S1: x = load - VS1_0
8852 S2: z = x + 1 - -
8853 */
8854
8855 /* In case of interleaving (non-unit grouped access):
8856
8857 S1: x2 = &base + 2
8858 S2: x0 = &base
8859 S3: x1 = &base + 1
8860 S4: x3 = &base + 3
8861
8862 Vectorized loads are created in the order of memory accesses
8863 starting from the access of the first stmt of the chain:
8864
8865 VS1: vx0 = &base
8866 VS2: vx1 = &base + vec_size*1
8867 VS3: vx3 = &base + vec_size*2
8868 VS4: vx4 = &base + vec_size*3
8869
8870 Then permutation statements are generated:
8871
8872 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8873 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8874 ...
8875
8876 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8877 (the order of the data-refs in the output of vect_permute_load_chain
8878 corresponds to the order of scalar stmts in the interleaving chain - see
8879 the documentation of vect_permute_load_chain()).
8880 The generation of permutation stmts and recording them in
8881 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8882
8883 In case of both multiple types and interleaving, the vector loads and
8884 permutation stmts above are created for every copy. The result vector
8885 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8886 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8887
8888 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8889 on a target that supports unaligned accesses (dr_unaligned_supported)
8890 we generate the following code:
8891 p = initial_addr;
8892 indx = 0;
8893 loop {
8894 p = p + indx * vectype_size;
8895 vec_dest = *(p);
8896 indx = indx + 1;
8897 }
8898
8899 Otherwise, the data reference is potentially unaligned on a target that
8900 does not support unaligned accesses (dr_explicit_realign_optimized) -
8901 then generate the following code, in which the data in each iteration is
8902 obtained by two vector loads, one from the previous iteration, and one
8903 from the current iteration:
8904 p1 = initial_addr;
8905 msq_init = *(floor(p1))
8906 p2 = initial_addr + VS - 1;
8907 realignment_token = call target_builtin;
8908 indx = 0;
8909 loop {
8910 p2 = p2 + indx * vectype_size
8911 lsq = *(floor(p2))
8912 vec_dest = realign_load (msq, lsq, realignment_token)
8913 indx = indx + 1;
8914 msq = lsq;
8915 } */
8916
8917 /* If the misalignment remains the same throughout the execution of the
8918 loop, we can create the init_addr and permutation mask at the loop
8919 preheader. Otherwise, it needs to be created inside the loop.
8920 This can only occur when vectorizing memory accesses in the inner-loop
8921 nested within an outer-loop that is being vectorized. */
8922
8923 if (nested_in_vect_loop
8924 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8925 GET_MODE_SIZE (TYPE_MODE (vectype))))
8926 {
8927 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8928 compute_in_loop = true;
8929 }
8930
8931 bool diff_first_stmt_info
8932 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
8933
8934 if ((alignment_support_scheme == dr_explicit_realign_optimized
8935 || alignment_support_scheme == dr_explicit_realign)
8936 && !compute_in_loop)
8937 {
8938 /* If we have different first_stmt_info, we can't set up realignment
8939 here, since we can't guarantee first_stmt_info DR has been
8940 initialized yet, use first_stmt_info_for_drptr DR by bumping the
8941 distance from first_stmt_info DR instead as below. */
8942 if (!diff_first_stmt_info)
8943 msq = vect_setup_realignment (vinfo,
8944 first_stmt_info, gsi, &realignment_token,
8945 alignment_support_scheme, NULL_TREE,
8946 &at_loop);
8947 if (alignment_support_scheme == dr_explicit_realign_optimized)
8948 {
8949 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8950 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8951 size_one_node);
8952 gcc_assert (!first_stmt_info_for_drptr);
8953 }
8954 }
8955 else
8956 at_loop = loop;
8957
8958 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8959 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8960
8961 tree bump;
8962 tree vec_offset = NULL_TREE;
8963 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8964 {
8965 aggr_type = NULL_TREE;
8966 bump = NULL_TREE;
8967 }
8968 else if (memory_access_type == VMAT_GATHER_SCATTER)
8969 {
8970 aggr_type = elem_type;
8971 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8972 &bump, &vec_offset);
8973 }
8974 else
8975 {
8976 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8977 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8978 else
8979 aggr_type = vectype;
8980 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8981 memory_access_type);
8982 }
8983
8984 vec<tree> vec_offsets = vNULL;
8985 auto_vec<tree> vec_masks;
8986 if (mask)
8987 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
8988 mask, &vec_masks, mask_vectype, NULL_TREE);
8989 tree vec_mask = NULL_TREE;
8990 poly_uint64 group_elt = 0;
8991 for (j = 0; j < ncopies; j++)
8992 {
8993 /* 1. Create the vector or array pointer update chain. */
8994 if (j == 0)
8995 {
8996 bool simd_lane_access_p
8997 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8998 if (simd_lane_access_p
8999 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9000 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9001 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9002 && integer_zerop (DR_INIT (first_dr_info->dr))
9003 && alias_sets_conflict_p (get_alias_set (aggr_type),
9004 get_alias_set (TREE_TYPE (ref_type)))
9005 && (alignment_support_scheme == dr_aligned
9006 || alignment_support_scheme == dr_unaligned_supported))
9007 {
9008 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9009 dataref_offset = build_int_cst (ref_type, 0);
9010 }
9011 else if (diff_first_stmt_info)
9012 {
9013 dataref_ptr
9014 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9015 aggr_type, at_loop, offset, &dummy,
9016 gsi, &ptr_incr, simd_lane_access_p,
9017 byte_offset, bump);
9018 /* Adjust the pointer by the difference to first_stmt. */
9019 data_reference_p ptrdr
9020 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9021 tree diff
9022 = fold_convert (sizetype,
9023 size_binop (MINUS_EXPR,
9024 DR_INIT (first_dr_info->dr),
9025 DR_INIT (ptrdr)));
9026 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9027 stmt_info, diff);
9028 if (alignment_support_scheme == dr_explicit_realign)
9029 {
9030 msq = vect_setup_realignment (vinfo,
9031 first_stmt_info_for_drptr, gsi,
9032 &realignment_token,
9033 alignment_support_scheme,
9034 dataref_ptr, &at_loop);
9035 gcc_assert (!compute_in_loop);
9036 }
9037 }
9038 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9039 {
9040 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9041 &dataref_ptr, &vec_offsets, ncopies);
9042 vec_offset = vec_offsets[0];
9043 }
9044 else
9045 dataref_ptr
9046 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9047 at_loop,
9048 offset, &dummy, gsi, &ptr_incr,
9049 simd_lane_access_p,
9050 byte_offset, bump);
9051 if (mask)
9052 vec_mask = vec_masks[0];
9053 }
9054 else
9055 {
9056 if (dataref_offset)
9057 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9058 bump);
9059 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9060 vec_offset = vec_offsets[j];
9061 else
9062 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9063 stmt_info, bump);
9064 if (mask)
9065 vec_mask = vec_masks[j];
9066 }
9067
9068 if (grouped_load || slp_perm)
9069 dr_chain.create (vec_num);
9070
9071 gimple *new_stmt = NULL;
9072 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9073 {
9074 tree vec_array;
9075
9076 vec_array = create_vector_array (vectype, vec_num);
9077
9078 tree final_mask = NULL_TREE;
9079 if (loop_masks)
9080 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9081 vectype, j);
9082 if (vec_mask)
9083 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9084 vec_mask, gsi);
9085
9086 gcall *call;
9087 if (final_mask)
9088 {
9089 /* Emit:
9090 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9091 VEC_MASK). */
9092 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9093 tree alias_ptr = build_int_cst (ref_type, align);
9094 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9095 dataref_ptr, alias_ptr,
9096 final_mask);
9097 }
9098 else
9099 {
9100 /* Emit:
9101 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9102 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9103 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9104 }
9105 gimple_call_set_lhs (call, vec_array);
9106 gimple_call_set_nothrow (call, true);
9107 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9108 new_stmt = call;
9109
9110 /* Extract each vector into an SSA_NAME. */
9111 for (i = 0; i < vec_num; i++)
9112 {
9113 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9114 vec_array, i);
9115 dr_chain.quick_push (new_temp);
9116 }
9117
9118 /* Record the mapping between SSA_NAMEs and statements. */
9119 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9120
9121 /* Record that VEC_ARRAY is now dead. */
9122 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9123 }
9124 else
9125 {
9126 for (i = 0; i < vec_num; i++)
9127 {
9128 tree final_mask = NULL_TREE;
9129 if (loop_masks
9130 && memory_access_type != VMAT_INVARIANT)
9131 final_mask = vect_get_loop_mask (gsi, loop_masks,
9132 vec_num * ncopies,
9133 vectype, vec_num * j + i);
9134 if (vec_mask)
9135 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9136 vec_mask, gsi);
9137
9138 if (i > 0)
9139 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9140 gsi, stmt_info, bump);
9141
9142 /* 2. Create the vector-load in the loop. */
9143 switch (alignment_support_scheme)
9144 {
9145 case dr_aligned:
9146 case dr_unaligned_supported:
9147 {
9148 unsigned int misalign;
9149 unsigned HOST_WIDE_INT align;
9150
9151 if (memory_access_type == VMAT_GATHER_SCATTER)
9152 {
9153 tree zero = build_zero_cst (vectype);
9154 tree scale = size_int (gs_info.scale);
9155 gcall *call;
9156 if (loop_masks)
9157 call = gimple_build_call_internal
9158 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9159 vec_offset, scale, zero, final_mask);
9160 else
9161 call = gimple_build_call_internal
9162 (IFN_GATHER_LOAD, 4, dataref_ptr,
9163 vec_offset, scale, zero);
9164 gimple_call_set_nothrow (call, true);
9165 new_stmt = call;
9166 data_ref = NULL_TREE;
9167 break;
9168 }
9169
9170 align =
9171 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9172 if (alignment_support_scheme == dr_aligned)
9173 {
9174 gcc_assert (aligned_access_p (first_dr_info));
9175 misalign = 0;
9176 }
9177 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9178 {
9179 align = dr_alignment
9180 (vect_dr_behavior (vinfo, first_dr_info));
9181 misalign = 0;
9182 }
9183 else
9184 misalign = DR_MISALIGNMENT (first_dr_info);
9185 if (dataref_offset == NULL_TREE
9186 && TREE_CODE (dataref_ptr) == SSA_NAME)
9187 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9188 align, misalign);
9189
9190 if (final_mask)
9191 {
9192 align = least_bit_hwi (misalign | align);
9193 tree ptr = build_int_cst (ref_type, align);
9194 gcall *call
9195 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9196 dataref_ptr, ptr,
9197 final_mask);
9198 gimple_call_set_nothrow (call, true);
9199 new_stmt = call;
9200 data_ref = NULL_TREE;
9201 }
9202 else
9203 {
9204 tree ltype = vectype;
9205 tree new_vtype = NULL_TREE;
9206 unsigned HOST_WIDE_INT gap
9207 = DR_GROUP_GAP (first_stmt_info);
9208 unsigned int vect_align
9209 = vect_known_alignment_in_bytes (first_dr_info);
9210 unsigned int scalar_dr_size
9211 = vect_get_scalar_dr_size (first_dr_info);
9212 /* If there's no peeling for gaps but we have a gap
9213 with slp loads then load the lower half of the
9214 vector only. See get_group_load_store_type for
9215 when we apply this optimization. */
9216 if (slp
9217 && loop_vinfo
9218 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9219 && gap != 0
9220 && known_eq (nunits, (group_size - gap) * 2)
9221 && known_eq (nunits, group_size)
9222 && gap >= (vect_align / scalar_dr_size))
9223 {
9224 tree half_vtype;
9225 new_vtype
9226 = vector_vector_composition_type (vectype, 2,
9227 &half_vtype);
9228 if (new_vtype != NULL_TREE)
9229 ltype = half_vtype;
9230 }
9231 tree offset
9232 = (dataref_offset ? dataref_offset
9233 : build_int_cst (ref_type, 0));
9234 if (ltype != vectype
9235 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9236 {
9237 unsigned HOST_WIDE_INT gap_offset
9238 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9239 tree gapcst = build_int_cst (ref_type, gap_offset);
9240 offset = size_binop (PLUS_EXPR, offset, gapcst);
9241 }
9242 data_ref
9243 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9244 if (alignment_support_scheme == dr_aligned)
9245 ;
9246 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9247 TREE_TYPE (data_ref)
9248 = build_aligned_type (TREE_TYPE (data_ref),
9249 align * BITS_PER_UNIT);
9250 else
9251 TREE_TYPE (data_ref)
9252 = build_aligned_type (TREE_TYPE (data_ref),
9253 TYPE_ALIGN (elem_type));
9254 if (ltype != vectype)
9255 {
9256 vect_copy_ref_info (data_ref,
9257 DR_REF (first_dr_info->dr));
9258 tree tem = make_ssa_name (ltype);
9259 new_stmt = gimple_build_assign (tem, data_ref);
9260 vect_finish_stmt_generation (vinfo, stmt_info,
9261 new_stmt, gsi);
9262 data_ref = NULL;
9263 vec<constructor_elt, va_gc> *v;
9264 vec_alloc (v, 2);
9265 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9266 {
9267 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9268 build_zero_cst (ltype));
9269 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9270 }
9271 else
9272 {
9273 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9274 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9275 build_zero_cst (ltype));
9276 }
9277 gcc_assert (new_vtype != NULL_TREE);
9278 if (new_vtype == vectype)
9279 new_stmt = gimple_build_assign (
9280 vec_dest, build_constructor (vectype, v));
9281 else
9282 {
9283 tree new_vname = make_ssa_name (new_vtype);
9284 new_stmt = gimple_build_assign (
9285 new_vname, build_constructor (new_vtype, v));
9286 vect_finish_stmt_generation (vinfo, stmt_info,
9287 new_stmt, gsi);
9288 new_stmt = gimple_build_assign (
9289 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9290 new_vname));
9291 }
9292 }
9293 }
9294 break;
9295 }
9296 case dr_explicit_realign:
9297 {
9298 tree ptr, bump;
9299
9300 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9301
9302 if (compute_in_loop)
9303 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9304 &realignment_token,
9305 dr_explicit_realign,
9306 dataref_ptr, NULL);
9307
9308 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9309 ptr = copy_ssa_name (dataref_ptr);
9310 else
9311 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9312 // For explicit realign the target alignment should be
9313 // known at compile time.
9314 unsigned HOST_WIDE_INT align =
9315 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9316 new_stmt = gimple_build_assign
9317 (ptr, BIT_AND_EXPR, dataref_ptr,
9318 build_int_cst
9319 (TREE_TYPE (dataref_ptr),
9320 -(HOST_WIDE_INT) align));
9321 vect_finish_stmt_generation (vinfo, stmt_info,
9322 new_stmt, gsi);
9323 data_ref
9324 = build2 (MEM_REF, vectype, ptr,
9325 build_int_cst (ref_type, 0));
9326 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9327 vec_dest = vect_create_destination_var (scalar_dest,
9328 vectype);
9329 new_stmt = gimple_build_assign (vec_dest, data_ref);
9330 new_temp = make_ssa_name (vec_dest, new_stmt);
9331 gimple_assign_set_lhs (new_stmt, new_temp);
9332 gimple_move_vops (new_stmt, stmt_info->stmt);
9333 vect_finish_stmt_generation (vinfo, stmt_info,
9334 new_stmt, gsi);
9335 msq = new_temp;
9336
9337 bump = size_binop (MULT_EXPR, vs,
9338 TYPE_SIZE_UNIT (elem_type));
9339 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9340 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9341 stmt_info, bump);
9342 new_stmt = gimple_build_assign
9343 (NULL_TREE, BIT_AND_EXPR, ptr,
9344 build_int_cst
9345 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9346 ptr = copy_ssa_name (ptr, new_stmt);
9347 gimple_assign_set_lhs (new_stmt, ptr);
9348 vect_finish_stmt_generation (vinfo, stmt_info,
9349 new_stmt, gsi);
9350 data_ref
9351 = build2 (MEM_REF, vectype, ptr,
9352 build_int_cst (ref_type, 0));
9353 break;
9354 }
9355 case dr_explicit_realign_optimized:
9356 {
9357 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9358 new_temp = copy_ssa_name (dataref_ptr);
9359 else
9360 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9361 // We should only be doing this if we know the target
9362 // alignment at compile time.
9363 unsigned HOST_WIDE_INT align =
9364 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9365 new_stmt = gimple_build_assign
9366 (new_temp, BIT_AND_EXPR, dataref_ptr,
9367 build_int_cst (TREE_TYPE (dataref_ptr),
9368 -(HOST_WIDE_INT) align));
9369 vect_finish_stmt_generation (vinfo, stmt_info,
9370 new_stmt, gsi);
9371 data_ref
9372 = build2 (MEM_REF, vectype, new_temp,
9373 build_int_cst (ref_type, 0));
9374 break;
9375 }
9376 default:
9377 gcc_unreachable ();
9378 }
9379 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9380 /* DATA_REF is null if we've already built the statement. */
9381 if (data_ref)
9382 {
9383 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9384 new_stmt = gimple_build_assign (vec_dest, data_ref);
9385 }
9386 new_temp = make_ssa_name (vec_dest, new_stmt);
9387 gimple_set_lhs (new_stmt, new_temp);
9388 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9389
9390 /* 3. Handle explicit realignment if necessary/supported.
9391 Create in loop:
9392 vec_dest = realign_load (msq, lsq, realignment_token) */
9393 if (alignment_support_scheme == dr_explicit_realign_optimized
9394 || alignment_support_scheme == dr_explicit_realign)
9395 {
9396 lsq = gimple_assign_lhs (new_stmt);
9397 if (!realignment_token)
9398 realignment_token = dataref_ptr;
9399 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9400 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9401 msq, lsq, realignment_token);
9402 new_temp = make_ssa_name (vec_dest, new_stmt);
9403 gimple_assign_set_lhs (new_stmt, new_temp);
9404 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9405
9406 if (alignment_support_scheme == dr_explicit_realign_optimized)
9407 {
9408 gcc_assert (phi);
9409 if (i == vec_num - 1 && j == ncopies - 1)
9410 add_phi_arg (phi, lsq,
9411 loop_latch_edge (containing_loop),
9412 UNKNOWN_LOCATION);
9413 msq = lsq;
9414 }
9415 }
9416
9417 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9418 {
9419 tree perm_mask = perm_mask_for_reverse (vectype);
9420 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9421 perm_mask, stmt_info, gsi);
9422 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9423 }
9424
9425 /* Collect vector loads and later create their permutation in
9426 vect_transform_grouped_load (). */
9427 if (grouped_load || slp_perm)
9428 dr_chain.quick_push (new_temp);
9429
9430 /* Store vector loads in the corresponding SLP_NODE. */
9431 if (slp && !slp_perm)
9432 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9433
9434 /* With SLP permutation we load the gaps as well, without
9435 we need to skip the gaps after we manage to fully load
9436 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9437 group_elt += nunits;
9438 if (maybe_ne (group_gap_adj, 0U)
9439 && !slp_perm
9440 && known_eq (group_elt, group_size - group_gap_adj))
9441 {
9442 poly_wide_int bump_val
9443 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9444 * group_gap_adj);
9445 tree bump = wide_int_to_tree (sizetype, bump_val);
9446 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9447 gsi, stmt_info, bump);
9448 group_elt = 0;
9449 }
9450 }
9451 /* Bump the vector pointer to account for a gap or for excess
9452 elements loaded for a permuted SLP load. */
9453 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9454 {
9455 poly_wide_int bump_val
9456 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9457 * group_gap_adj);
9458 tree bump = wide_int_to_tree (sizetype, bump_val);
9459 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9460 stmt_info, bump);
9461 }
9462 }
9463
9464 if (slp && !slp_perm)
9465 continue;
9466
9467 if (slp_perm)
9468 {
9469 unsigned n_perms;
9470 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9471 gsi, vf, false, &n_perms);
9472 gcc_assert (ok);
9473 }
9474 else
9475 {
9476 if (grouped_load)
9477 {
9478 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9479 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9480 group_size, gsi);
9481 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9482 }
9483 else
9484 {
9485 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9486 }
9487 }
9488 dr_chain.release ();
9489 }
9490 if (!slp)
9491 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9492
9493 return true;
9494 }
9495
9496 /* Function vect_is_simple_cond.
9497
9498 Input:
9499 LOOP - the loop that is being vectorized.
9500 COND - Condition that is checked for simple use.
9501
9502 Output:
9503 *COMP_VECTYPE - the vector type for the comparison.
9504 *DTS - The def types for the arguments of the comparison
9505
9506 Returns whether a COND can be vectorized. Checks whether
9507 condition operands are supportable using vec_is_simple_use. */
9508
9509 static bool
9510 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9511 slp_tree slp_node, tree *comp_vectype,
9512 enum vect_def_type *dts, tree vectype)
9513 {
9514 tree lhs, rhs;
9515 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9516 slp_tree slp_op;
9517
9518 /* Mask case. */
9519 if (TREE_CODE (cond) == SSA_NAME
9520 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9521 {
9522 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9523 &slp_op, &dts[0], comp_vectype)
9524 || !*comp_vectype
9525 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9526 return false;
9527 return true;
9528 }
9529
9530 if (!COMPARISON_CLASS_P (cond))
9531 return false;
9532
9533 lhs = TREE_OPERAND (cond, 0);
9534 rhs = TREE_OPERAND (cond, 1);
9535
9536 if (TREE_CODE (lhs) == SSA_NAME)
9537 {
9538 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9539 &lhs, &slp_op, &dts[0], &vectype1))
9540 return false;
9541 }
9542 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9543 || TREE_CODE (lhs) == FIXED_CST)
9544 dts[0] = vect_constant_def;
9545 else
9546 return false;
9547
9548 if (TREE_CODE (rhs) == SSA_NAME)
9549 {
9550 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9551 &rhs, &slp_op, &dts[1], &vectype2))
9552 return false;
9553 }
9554 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9555 || TREE_CODE (rhs) == FIXED_CST)
9556 dts[1] = vect_constant_def;
9557 else
9558 return false;
9559
9560 if (vectype1 && vectype2
9561 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9562 TYPE_VECTOR_SUBPARTS (vectype2)))
9563 return false;
9564
9565 *comp_vectype = vectype1 ? vectype1 : vectype2;
9566 /* Invariant comparison. */
9567 if (! *comp_vectype)
9568 {
9569 tree scalar_type = TREE_TYPE (lhs);
9570 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9571 *comp_vectype = truth_type_for (vectype);
9572 else
9573 {
9574 /* If we can widen the comparison to match vectype do so. */
9575 if (INTEGRAL_TYPE_P (scalar_type)
9576 && !slp_node
9577 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9578 TYPE_SIZE (TREE_TYPE (vectype))))
9579 scalar_type = build_nonstandard_integer_type
9580 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9581 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9582 slp_node);
9583 }
9584 }
9585
9586 return true;
9587 }
9588
9589 /* vectorizable_condition.
9590
9591 Check if STMT_INFO is conditional modify expression that can be vectorized.
9592 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9593 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9594 at GSI.
9595
9596 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9597
9598 Return true if STMT_INFO is vectorizable in this way. */
9599
9600 static bool
9601 vectorizable_condition (vec_info *vinfo,
9602 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9603 gimple **vec_stmt,
9604 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9605 {
9606 tree scalar_dest = NULL_TREE;
9607 tree vec_dest = NULL_TREE;
9608 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9609 tree then_clause, else_clause;
9610 tree comp_vectype = NULL_TREE;
9611 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9612 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9613 tree vec_compare;
9614 tree new_temp;
9615 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9616 enum vect_def_type dts[4]
9617 = {vect_unknown_def_type, vect_unknown_def_type,
9618 vect_unknown_def_type, vect_unknown_def_type};
9619 int ndts = 4;
9620 int ncopies;
9621 int vec_num;
9622 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9623 int i;
9624 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9625 vec<tree> vec_oprnds0 = vNULL;
9626 vec<tree> vec_oprnds1 = vNULL;
9627 vec<tree> vec_oprnds2 = vNULL;
9628 vec<tree> vec_oprnds3 = vNULL;
9629 tree vec_cmp_type;
9630 bool masked = false;
9631
9632 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9633 return false;
9634
9635 /* Is vectorizable conditional operation? */
9636 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9637 if (!stmt)
9638 return false;
9639
9640 code = gimple_assign_rhs_code (stmt);
9641 if (code != COND_EXPR)
9642 return false;
9643
9644 stmt_vec_info reduc_info = NULL;
9645 int reduc_index = -1;
9646 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9647 bool for_reduction
9648 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9649 if (for_reduction)
9650 {
9651 if (STMT_SLP_TYPE (stmt_info))
9652 return false;
9653 reduc_info = info_for_reduction (vinfo, stmt_info);
9654 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9655 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9656 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9657 || reduc_index != -1);
9658 }
9659 else
9660 {
9661 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9662 return false;
9663
9664 /* FORNOW: only supported as part of a reduction. */
9665 if (STMT_VINFO_LIVE_P (stmt_info))
9666 {
9667 if (dump_enabled_p ())
9668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9669 "value used after loop.\n");
9670 return false;
9671 }
9672 }
9673
9674 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9675 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9676
9677 if (slp_node)
9678 {
9679 ncopies = 1;
9680 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9681 }
9682 else
9683 {
9684 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9685 vec_num = 1;
9686 }
9687
9688 gcc_assert (ncopies >= 1);
9689 if (for_reduction && ncopies > 1)
9690 return false; /* FORNOW */
9691
9692 cond_expr = gimple_assign_rhs1 (stmt);
9693
9694 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9695 &comp_vectype, &dts[0], vectype)
9696 || !comp_vectype)
9697 return false;
9698
9699 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9700 slp_tree then_slp_node, else_slp_node;
9701 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9702 &then_clause, &then_slp_node, &dts[2], &vectype1))
9703 return false;
9704 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9705 &else_clause, &else_slp_node, &dts[3], &vectype2))
9706 return false;
9707
9708 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9709 return false;
9710
9711 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9712 return false;
9713
9714 masked = !COMPARISON_CLASS_P (cond_expr);
9715 vec_cmp_type = truth_type_for (comp_vectype);
9716
9717 if (vec_cmp_type == NULL_TREE)
9718 return false;
9719
9720 cond_code = TREE_CODE (cond_expr);
9721 if (!masked)
9722 {
9723 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9724 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9725 }
9726
9727 /* For conditional reductions, the "then" value needs to be the candidate
9728 value calculated by this iteration while the "else" value needs to be
9729 the result carried over from previous iterations. If the COND_EXPR
9730 is the other way around, we need to swap it. */
9731 bool must_invert_cmp_result = false;
9732 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9733 {
9734 if (masked)
9735 must_invert_cmp_result = true;
9736 else
9737 {
9738 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9739 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9740 if (new_code == ERROR_MARK)
9741 must_invert_cmp_result = true;
9742 else
9743 {
9744 cond_code = new_code;
9745 /* Make sure we don't accidentally use the old condition. */
9746 cond_expr = NULL_TREE;
9747 }
9748 }
9749 std::swap (then_clause, else_clause);
9750 }
9751
9752 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9753 {
9754 /* Boolean values may have another representation in vectors
9755 and therefore we prefer bit operations over comparison for
9756 them (which also works for scalar masks). We store opcodes
9757 to use in bitop1 and bitop2. Statement is vectorized as
9758 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9759 depending on bitop1 and bitop2 arity. */
9760 switch (cond_code)
9761 {
9762 case GT_EXPR:
9763 bitop1 = BIT_NOT_EXPR;
9764 bitop2 = BIT_AND_EXPR;
9765 break;
9766 case GE_EXPR:
9767 bitop1 = BIT_NOT_EXPR;
9768 bitop2 = BIT_IOR_EXPR;
9769 break;
9770 case LT_EXPR:
9771 bitop1 = BIT_NOT_EXPR;
9772 bitop2 = BIT_AND_EXPR;
9773 std::swap (cond_expr0, cond_expr1);
9774 break;
9775 case LE_EXPR:
9776 bitop1 = BIT_NOT_EXPR;
9777 bitop2 = BIT_IOR_EXPR;
9778 std::swap (cond_expr0, cond_expr1);
9779 break;
9780 case NE_EXPR:
9781 bitop1 = BIT_XOR_EXPR;
9782 break;
9783 case EQ_EXPR:
9784 bitop1 = BIT_XOR_EXPR;
9785 bitop2 = BIT_NOT_EXPR;
9786 break;
9787 default:
9788 return false;
9789 }
9790 cond_code = SSA_NAME;
9791 }
9792
9793 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
9794 && reduction_type == EXTRACT_LAST_REDUCTION
9795 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
9796 {
9797 if (dump_enabled_p ())
9798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9799 "reduction comparison operation not supported.\n");
9800 return false;
9801 }
9802
9803 if (!vec_stmt)
9804 {
9805 if (bitop1 != NOP_EXPR)
9806 {
9807 machine_mode mode = TYPE_MODE (comp_vectype);
9808 optab optab;
9809
9810 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9811 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9812 return false;
9813
9814 if (bitop2 != NOP_EXPR)
9815 {
9816 optab = optab_for_tree_code (bitop2, comp_vectype,
9817 optab_default);
9818 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9819 return false;
9820 }
9821 }
9822
9823 vect_cost_for_stmt kind = vector_stmt;
9824 if (reduction_type == EXTRACT_LAST_REDUCTION)
9825 /* Count one reduction-like operation per vector. */
9826 kind = vec_to_scalar;
9827 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
9828 return false;
9829
9830 if (slp_node
9831 && (!vect_maybe_update_slp_op_vectype
9832 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
9833 || (op_adjust == 1
9834 && !vect_maybe_update_slp_op_vectype
9835 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
9836 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
9837 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
9838 {
9839 if (dump_enabled_p ())
9840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9841 "incompatible vector types for invariants\n");
9842 return false;
9843 }
9844
9845 if (loop_vinfo
9846 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
9847 && reduction_type == EXTRACT_LAST_REDUCTION)
9848 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
9849 ncopies * vec_num, vectype, NULL);
9850
9851 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
9852 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
9853 cost_vec, kind);
9854 return true;
9855 }
9856
9857 /* Transform. */
9858
9859 if (!slp_node)
9860 {
9861 vec_oprnds0.create (1);
9862 vec_oprnds1.create (1);
9863 vec_oprnds2.create (1);
9864 vec_oprnds3.create (1);
9865 }
9866
9867 /* Handle def. */
9868 scalar_dest = gimple_assign_lhs (stmt);
9869 if (reduction_type != EXTRACT_LAST_REDUCTION)
9870 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9871
9872 bool swap_cond_operands = false;
9873
9874 /* See whether another part of the vectorized code applies a loop
9875 mask to the condition, or to its inverse. */
9876
9877 vec_loop_masks *masks = NULL;
9878 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
9879 {
9880 if (reduction_type == EXTRACT_LAST_REDUCTION)
9881 masks = &LOOP_VINFO_MASKS (loop_vinfo);
9882 else
9883 {
9884 scalar_cond_masked_key cond (cond_expr, ncopies);
9885 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
9886 masks = &LOOP_VINFO_MASKS (loop_vinfo);
9887 else
9888 {
9889 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
9890 cond.code = invert_tree_comparison (cond.code, honor_nans);
9891 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
9892 {
9893 masks = &LOOP_VINFO_MASKS (loop_vinfo);
9894 cond_code = cond.code;
9895 swap_cond_operands = true;
9896 }
9897 }
9898 }
9899 }
9900
9901 /* Handle cond expr. */
9902 if (masked)
9903 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9904 cond_expr, &vec_oprnds0, comp_vectype,
9905 then_clause, &vec_oprnds2, vectype,
9906 reduction_type != EXTRACT_LAST_REDUCTION
9907 ? else_clause : NULL, &vec_oprnds3, vectype);
9908 else
9909 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9910 cond_expr0, &vec_oprnds0, comp_vectype,
9911 cond_expr1, &vec_oprnds1, comp_vectype,
9912 then_clause, &vec_oprnds2, vectype,
9913 reduction_type != EXTRACT_LAST_REDUCTION
9914 ? else_clause : NULL, &vec_oprnds3, vectype);
9915
9916 /* Arguments are ready. Create the new vector stmt. */
9917 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9918 {
9919 vec_then_clause = vec_oprnds2[i];
9920 vec_else_clause = vec_oprnds3[i];
9921
9922 if (swap_cond_operands)
9923 std::swap (vec_then_clause, vec_else_clause);
9924
9925 if (masked)
9926 vec_compare = vec_cond_lhs;
9927 else
9928 {
9929 vec_cond_rhs = vec_oprnds1[i];
9930 if (bitop1 == NOP_EXPR)
9931 vec_compare = build2 (cond_code, vec_cmp_type,
9932 vec_cond_lhs, vec_cond_rhs);
9933 else
9934 {
9935 new_temp = make_ssa_name (vec_cmp_type);
9936 gassign *new_stmt;
9937 if (bitop1 == BIT_NOT_EXPR)
9938 new_stmt = gimple_build_assign (new_temp, bitop1,
9939 vec_cond_rhs);
9940 else
9941 new_stmt
9942 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9943 vec_cond_rhs);
9944 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9945 if (bitop2 == NOP_EXPR)
9946 vec_compare = new_temp;
9947 else if (bitop2 == BIT_NOT_EXPR)
9948 {
9949 /* Instead of doing ~x ? y : z do x ? z : y. */
9950 vec_compare = new_temp;
9951 std::swap (vec_then_clause, vec_else_clause);
9952 }
9953 else
9954 {
9955 vec_compare = make_ssa_name (vec_cmp_type);
9956 new_stmt
9957 = gimple_build_assign (vec_compare, bitop2,
9958 vec_cond_lhs, new_temp);
9959 vect_finish_stmt_generation (vinfo, stmt_info,
9960 new_stmt, gsi);
9961 }
9962 }
9963 }
9964
9965 /* If we decided to apply a loop mask to the result of the vector
9966 comparison, AND the comparison with the mask now. Later passes
9967 should then be able to reuse the AND results between mulitple
9968 vector statements.
9969
9970 For example:
9971 for (int i = 0; i < 100; ++i)
9972 x[i] = y[i] ? z[i] : 10;
9973
9974 results in following optimized GIMPLE:
9975
9976 mask__35.8_43 = vect__4.7_41 != { 0, ... };
9977 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
9978 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
9979 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
9980 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
9981 vect_iftmp.11_47, { 10, ... }>;
9982
9983 instead of using a masked and unmasked forms of
9984 vec != { 0, ... } (masked in the MASK_LOAD,
9985 unmasked in the VEC_COND_EXPR). */
9986
9987 /* Force vec_compare to be an SSA_NAME rather than a comparison,
9988 in cases where that's necessary. */
9989
9990 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
9991 {
9992 if (!is_gimple_val (vec_compare))
9993 {
9994 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9995 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9996 vec_compare);
9997 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9998 vec_compare = vec_compare_name;
9999 }
10000
10001 if (must_invert_cmp_result)
10002 {
10003 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10004 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10005 BIT_NOT_EXPR,
10006 vec_compare);
10007 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10008 vec_compare = vec_compare_name;
10009 }
10010
10011 if (masks)
10012 {
10013 unsigned vec_num = vec_oprnds0.length ();
10014 tree loop_mask
10015 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10016 vectype, i);
10017 tree tmp2 = make_ssa_name (vec_cmp_type);
10018 gassign *g
10019 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10020 loop_mask);
10021 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10022 vec_compare = tmp2;
10023 }
10024 }
10025
10026 gimple *new_stmt;
10027 if (reduction_type == EXTRACT_LAST_REDUCTION)
10028 {
10029 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10030 tree lhs = gimple_get_lhs (old_stmt);
10031 new_stmt = gimple_build_call_internal
10032 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10033 vec_then_clause);
10034 gimple_call_set_lhs (new_stmt, lhs);
10035 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10036 if (old_stmt == gsi_stmt (*gsi))
10037 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10038 else
10039 {
10040 /* In this case we're moving the definition to later in the
10041 block. That doesn't matter because the only uses of the
10042 lhs are in phi statements. */
10043 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10044 gsi_remove (&old_gsi, true);
10045 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10046 }
10047 }
10048 else
10049 {
10050 new_temp = make_ssa_name (vec_dest);
10051 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10052 vec_then_clause, vec_else_clause);
10053 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10054 }
10055 if (slp_node)
10056 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10057 else
10058 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10059 }
10060
10061 if (!slp_node)
10062 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10063
10064 vec_oprnds0.release ();
10065 vec_oprnds1.release ();
10066 vec_oprnds2.release ();
10067 vec_oprnds3.release ();
10068
10069 return true;
10070 }
10071
10072 /* vectorizable_comparison.
10073
10074 Check if STMT_INFO is comparison expression that can be vectorized.
10075 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10076 comparison, put it in VEC_STMT, and insert it at GSI.
10077
10078 Return true if STMT_INFO is vectorizable in this way. */
10079
10080 static bool
10081 vectorizable_comparison (vec_info *vinfo,
10082 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10083 gimple **vec_stmt,
10084 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10085 {
10086 tree lhs, rhs1, rhs2;
10087 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10089 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10090 tree new_temp;
10091 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10092 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10093 int ndts = 2;
10094 poly_uint64 nunits;
10095 int ncopies;
10096 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10097 int i;
10098 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10099 vec<tree> vec_oprnds0 = vNULL;
10100 vec<tree> vec_oprnds1 = vNULL;
10101 tree mask_type;
10102 tree mask;
10103
10104 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10105 return false;
10106
10107 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10108 return false;
10109
10110 mask_type = vectype;
10111 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10112
10113 if (slp_node)
10114 ncopies = 1;
10115 else
10116 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10117
10118 gcc_assert (ncopies >= 1);
10119 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10120 return false;
10121
10122 if (STMT_VINFO_LIVE_P (stmt_info))
10123 {
10124 if (dump_enabled_p ())
10125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10126 "value used after loop.\n");
10127 return false;
10128 }
10129
10130 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10131 if (!stmt)
10132 return false;
10133
10134 code = gimple_assign_rhs_code (stmt);
10135
10136 if (TREE_CODE_CLASS (code) != tcc_comparison)
10137 return false;
10138
10139 slp_tree slp_rhs1, slp_rhs2;
10140 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10141 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10142 return false;
10143
10144 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10145 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10146 return false;
10147
10148 if (vectype1 && vectype2
10149 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10150 TYPE_VECTOR_SUBPARTS (vectype2)))
10151 return false;
10152
10153 vectype = vectype1 ? vectype1 : vectype2;
10154
10155 /* Invariant comparison. */
10156 if (!vectype)
10157 {
10158 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10159 vectype = mask_type;
10160 else
10161 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10162 slp_node);
10163 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10164 return false;
10165 }
10166 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10167 return false;
10168
10169 /* Can't compare mask and non-mask types. */
10170 if (vectype1 && vectype2
10171 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10172 return false;
10173
10174 /* Boolean values may have another representation in vectors
10175 and therefore we prefer bit operations over comparison for
10176 them (which also works for scalar masks). We store opcodes
10177 to use in bitop1 and bitop2. Statement is vectorized as
10178 BITOP2 (rhs1 BITOP1 rhs2) or
10179 rhs1 BITOP2 (BITOP1 rhs2)
10180 depending on bitop1 and bitop2 arity. */
10181 bool swap_p = false;
10182 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10183 {
10184 if (code == GT_EXPR)
10185 {
10186 bitop1 = BIT_NOT_EXPR;
10187 bitop2 = BIT_AND_EXPR;
10188 }
10189 else if (code == GE_EXPR)
10190 {
10191 bitop1 = BIT_NOT_EXPR;
10192 bitop2 = BIT_IOR_EXPR;
10193 }
10194 else if (code == LT_EXPR)
10195 {
10196 bitop1 = BIT_NOT_EXPR;
10197 bitop2 = BIT_AND_EXPR;
10198 swap_p = true;
10199 }
10200 else if (code == LE_EXPR)
10201 {
10202 bitop1 = BIT_NOT_EXPR;
10203 bitop2 = BIT_IOR_EXPR;
10204 swap_p = true;
10205 }
10206 else
10207 {
10208 bitop1 = BIT_XOR_EXPR;
10209 if (code == EQ_EXPR)
10210 bitop2 = BIT_NOT_EXPR;
10211 }
10212 }
10213
10214 if (!vec_stmt)
10215 {
10216 if (bitop1 == NOP_EXPR)
10217 {
10218 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10219 return false;
10220 }
10221 else
10222 {
10223 machine_mode mode = TYPE_MODE (vectype);
10224 optab optab;
10225
10226 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10227 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10228 return false;
10229
10230 if (bitop2 != NOP_EXPR)
10231 {
10232 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10233 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10234 return false;
10235 }
10236 }
10237
10238 /* Put types on constant and invariant SLP children. */
10239 if (slp_node
10240 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10241 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10242 {
10243 if (dump_enabled_p ())
10244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10245 "incompatible vector types for invariants\n");
10246 return false;
10247 }
10248
10249 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10250 vect_model_simple_cost (vinfo, stmt_info,
10251 ncopies * (1 + (bitop2 != NOP_EXPR)),
10252 dts, ndts, slp_node, cost_vec);
10253 return true;
10254 }
10255
10256 /* Transform. */
10257 if (!slp_node)
10258 {
10259 vec_oprnds0.create (1);
10260 vec_oprnds1.create (1);
10261 }
10262
10263 /* Handle def. */
10264 lhs = gimple_assign_lhs (stmt);
10265 mask = vect_create_destination_var (lhs, mask_type);
10266
10267 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10268 rhs1, &vec_oprnds0, vectype,
10269 rhs2, &vec_oprnds1, vectype);
10270 if (swap_p)
10271 std::swap (vec_oprnds0, vec_oprnds1);
10272
10273 /* Arguments are ready. Create the new vector stmt. */
10274 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10275 {
10276 gimple *new_stmt;
10277 vec_rhs2 = vec_oprnds1[i];
10278
10279 new_temp = make_ssa_name (mask);
10280 if (bitop1 == NOP_EXPR)
10281 {
10282 new_stmt = gimple_build_assign (new_temp, code,
10283 vec_rhs1, vec_rhs2);
10284 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10285 }
10286 else
10287 {
10288 if (bitop1 == BIT_NOT_EXPR)
10289 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10290 else
10291 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10292 vec_rhs2);
10293 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10294 if (bitop2 != NOP_EXPR)
10295 {
10296 tree res = make_ssa_name (mask);
10297 if (bitop2 == BIT_NOT_EXPR)
10298 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10299 else
10300 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10301 new_temp);
10302 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10303 }
10304 }
10305 if (slp_node)
10306 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10307 else
10308 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10309 }
10310
10311 if (!slp_node)
10312 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10313
10314 vec_oprnds0.release ();
10315 vec_oprnds1.release ();
10316
10317 return true;
10318 }
10319
10320 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10321 can handle all live statements in the node. Otherwise return true
10322 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10323 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10324
10325 static bool
10326 can_vectorize_live_stmts (loop_vec_info loop_vinfo,
10327 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10328 slp_tree slp_node, slp_instance slp_node_instance,
10329 bool vec_stmt_p,
10330 stmt_vector_for_cost *cost_vec)
10331 {
10332 if (slp_node)
10333 {
10334 stmt_vec_info slp_stmt_info;
10335 unsigned int i;
10336 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10337 {
10338 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10339 && !vectorizable_live_operation (loop_vinfo,
10340 slp_stmt_info, gsi, slp_node,
10341 slp_node_instance, i,
10342 vec_stmt_p, cost_vec))
10343 return false;
10344 }
10345 }
10346 else if (STMT_VINFO_LIVE_P (stmt_info)
10347 && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
10348 slp_node, slp_node_instance, -1,
10349 vec_stmt_p, cost_vec))
10350 return false;
10351
10352 return true;
10353 }
10354
10355 /* Make sure the statement is vectorizable. */
10356
10357 opt_result
10358 vect_analyze_stmt (vec_info *vinfo,
10359 stmt_vec_info stmt_info, bool *need_to_vectorize,
10360 slp_tree node, slp_instance node_instance,
10361 stmt_vector_for_cost *cost_vec)
10362 {
10363 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10364 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10365 bool ok;
10366 gimple_seq pattern_def_seq;
10367
10368 if (dump_enabled_p ())
10369 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10370 stmt_info->stmt);
10371
10372 if (gimple_has_volatile_ops (stmt_info->stmt))
10373 return opt_result::failure_at (stmt_info->stmt,
10374 "not vectorized:"
10375 " stmt has volatile operands: %G\n",
10376 stmt_info->stmt);
10377
10378 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10379 && node == NULL
10380 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10381 {
10382 gimple_stmt_iterator si;
10383
10384 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10385 {
10386 stmt_vec_info pattern_def_stmt_info
10387 = vinfo->lookup_stmt (gsi_stmt (si));
10388 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10389 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10390 {
10391 /* Analyze def stmt of STMT if it's a pattern stmt. */
10392 if (dump_enabled_p ())
10393 dump_printf_loc (MSG_NOTE, vect_location,
10394 "==> examining pattern def statement: %G",
10395 pattern_def_stmt_info->stmt);
10396
10397 opt_result res
10398 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10399 need_to_vectorize, node, node_instance,
10400 cost_vec);
10401 if (!res)
10402 return res;
10403 }
10404 }
10405 }
10406
10407 /* Skip stmts that do not need to be vectorized. In loops this is expected
10408 to include:
10409 - the COND_EXPR which is the loop exit condition
10410 - any LABEL_EXPRs in the loop
10411 - computations that are used only for array indexing or loop control.
10412 In basic blocks we only analyze statements that are a part of some SLP
10413 instance, therefore, all the statements are relevant.
10414
10415 Pattern statement needs to be analyzed instead of the original statement
10416 if the original statement is not relevant. Otherwise, we analyze both
10417 statements. In basic blocks we are called from some SLP instance
10418 traversal, don't analyze pattern stmts instead, the pattern stmts
10419 already will be part of SLP instance. */
10420
10421 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10422 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10423 && !STMT_VINFO_LIVE_P (stmt_info))
10424 {
10425 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10426 && pattern_stmt_info
10427 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10428 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10429 {
10430 /* Analyze PATTERN_STMT instead of the original stmt. */
10431 stmt_info = pattern_stmt_info;
10432 if (dump_enabled_p ())
10433 dump_printf_loc (MSG_NOTE, vect_location,
10434 "==> examining pattern statement: %G",
10435 stmt_info->stmt);
10436 }
10437 else
10438 {
10439 if (dump_enabled_p ())
10440 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10441
10442 return opt_result::success ();
10443 }
10444 }
10445 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10446 && node == NULL
10447 && pattern_stmt_info
10448 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10449 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10450 {
10451 /* Analyze PATTERN_STMT too. */
10452 if (dump_enabled_p ())
10453 dump_printf_loc (MSG_NOTE, vect_location,
10454 "==> examining pattern statement: %G",
10455 pattern_stmt_info->stmt);
10456
10457 opt_result res
10458 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10459 node_instance, cost_vec);
10460 if (!res)
10461 return res;
10462 }
10463
10464 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10465 {
10466 case vect_internal_def:
10467 break;
10468
10469 case vect_reduction_def:
10470 case vect_nested_cycle:
10471 gcc_assert (!bb_vinfo
10472 && (relevance == vect_used_in_outer
10473 || relevance == vect_used_in_outer_by_reduction
10474 || relevance == vect_used_by_reduction
10475 || relevance == vect_unused_in_scope
10476 || relevance == vect_used_only_live));
10477 break;
10478
10479 case vect_induction_def:
10480 gcc_assert (!bb_vinfo);
10481 break;
10482
10483 case vect_constant_def:
10484 case vect_external_def:
10485 case vect_unknown_def_type:
10486 default:
10487 gcc_unreachable ();
10488 }
10489
10490 if (STMT_VINFO_RELEVANT_P (stmt_info))
10491 {
10492 tree type = gimple_expr_type (stmt_info->stmt);
10493 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10494 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10495 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10496 || (call && gimple_call_lhs (call) == NULL_TREE));
10497 *need_to_vectorize = true;
10498 }
10499
10500 if (PURE_SLP_STMT (stmt_info) && !node)
10501 {
10502 if (dump_enabled_p ())
10503 dump_printf_loc (MSG_NOTE, vect_location,
10504 "handled only by SLP analysis\n");
10505 return opt_result::success ();
10506 }
10507
10508 ok = true;
10509 if (!bb_vinfo
10510 && (STMT_VINFO_RELEVANT_P (stmt_info)
10511 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10512 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10513 -mveclibabi= takes preference over library functions with
10514 the simd attribute. */
10515 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10516 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10517 cost_vec)
10518 || vectorizable_conversion (vinfo, stmt_info,
10519 NULL, NULL, node, cost_vec)
10520 || vectorizable_operation (vinfo, stmt_info,
10521 NULL, NULL, node, cost_vec)
10522 || vectorizable_assignment (vinfo, stmt_info,
10523 NULL, NULL, node, cost_vec)
10524 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10525 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10526 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10527 node, node_instance, cost_vec)
10528 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10529 NULL, NULL, node, cost_vec)
10530 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10531 || vectorizable_condition (vinfo, stmt_info,
10532 NULL, NULL, node, cost_vec)
10533 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10534 cost_vec)
10535 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10536 stmt_info, NULL, node));
10537 else
10538 {
10539 if (bb_vinfo)
10540 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10541 || vectorizable_simd_clone_call (vinfo, stmt_info,
10542 NULL, NULL, node, cost_vec)
10543 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10544 cost_vec)
10545 || vectorizable_shift (vinfo, stmt_info,
10546 NULL, NULL, node, cost_vec)
10547 || vectorizable_operation (vinfo, stmt_info,
10548 NULL, NULL, node, cost_vec)
10549 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10550 cost_vec)
10551 || vectorizable_load (vinfo, stmt_info,
10552 NULL, NULL, node, cost_vec)
10553 || vectorizable_store (vinfo, stmt_info,
10554 NULL, NULL, node, cost_vec)
10555 || vectorizable_condition (vinfo, stmt_info,
10556 NULL, NULL, node, cost_vec)
10557 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10558 cost_vec));
10559 }
10560
10561 if (!ok)
10562 return opt_result::failure_at (stmt_info->stmt,
10563 "not vectorized:"
10564 " relevant stmt not supported: %G",
10565 stmt_info->stmt);
10566
10567 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10568 need extra handling, except for vectorizable reductions. */
10569 if (!bb_vinfo
10570 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10571 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10572 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10573 stmt_info, NULL, node, node_instance,
10574 false, cost_vec))
10575 return opt_result::failure_at (stmt_info->stmt,
10576 "not vectorized:"
10577 " live stmt not supported: %G",
10578 stmt_info->stmt);
10579
10580 return opt_result::success ();
10581 }
10582
10583
10584 /* Function vect_transform_stmt.
10585
10586 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10587
10588 bool
10589 vect_transform_stmt (vec_info *vinfo,
10590 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10591 slp_tree slp_node, slp_instance slp_node_instance)
10592 {
10593 bool is_store = false;
10594 gimple *vec_stmt = NULL;
10595 bool done;
10596
10597 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10598
10599 switch (STMT_VINFO_TYPE (stmt_info))
10600 {
10601 case type_demotion_vec_info_type:
10602 case type_promotion_vec_info_type:
10603 case type_conversion_vec_info_type:
10604 done = vectorizable_conversion (vinfo, stmt_info,
10605 gsi, &vec_stmt, slp_node, NULL);
10606 gcc_assert (done);
10607 break;
10608
10609 case induc_vec_info_type:
10610 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10611 stmt_info, gsi, &vec_stmt, slp_node,
10612 NULL);
10613 gcc_assert (done);
10614 break;
10615
10616 case shift_vec_info_type:
10617 done = vectorizable_shift (vinfo, stmt_info,
10618 gsi, &vec_stmt, slp_node, NULL);
10619 gcc_assert (done);
10620 break;
10621
10622 case op_vec_info_type:
10623 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10624 NULL);
10625 gcc_assert (done);
10626 break;
10627
10628 case assignment_vec_info_type:
10629 done = vectorizable_assignment (vinfo, stmt_info,
10630 gsi, &vec_stmt, slp_node, NULL);
10631 gcc_assert (done);
10632 break;
10633
10634 case load_vec_info_type:
10635 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10636 NULL);
10637 gcc_assert (done);
10638 break;
10639
10640 case store_vec_info_type:
10641 done = vectorizable_store (vinfo, stmt_info,
10642 gsi, &vec_stmt, slp_node, NULL);
10643 gcc_assert (done);
10644 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10645 {
10646 /* In case of interleaving, the whole chain is vectorized when the
10647 last store in the chain is reached. Store stmts before the last
10648 one are skipped, and there vec_stmt_info shouldn't be freed
10649 meanwhile. */
10650 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10651 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10652 is_store = true;
10653 }
10654 else
10655 is_store = true;
10656 break;
10657
10658 case condition_vec_info_type:
10659 done = vectorizable_condition (vinfo, stmt_info,
10660 gsi, &vec_stmt, slp_node, NULL);
10661 gcc_assert (done);
10662 break;
10663
10664 case comparison_vec_info_type:
10665 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10666 slp_node, NULL);
10667 gcc_assert (done);
10668 break;
10669
10670 case call_vec_info_type:
10671 done = vectorizable_call (vinfo, stmt_info,
10672 gsi, &vec_stmt, slp_node, NULL);
10673 break;
10674
10675 case call_simd_clone_vec_info_type:
10676 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10677 slp_node, NULL);
10678 break;
10679
10680 case reduc_vec_info_type:
10681 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10682 gsi, &vec_stmt, slp_node);
10683 gcc_assert (done);
10684 break;
10685
10686 case cycle_phi_info_type:
10687 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10688 &vec_stmt, slp_node, slp_node_instance);
10689 gcc_assert (done);
10690 break;
10691
10692 case lc_phi_info_type:
10693 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10694 stmt_info, &vec_stmt, slp_node);
10695 gcc_assert (done);
10696 break;
10697
10698 default:
10699 if (!STMT_VINFO_LIVE_P (stmt_info))
10700 {
10701 if (dump_enabled_p ())
10702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10703 "stmt not supported.\n");
10704 gcc_unreachable ();
10705 }
10706 done = true;
10707 }
10708
10709 if (!slp_node && vec_stmt)
10710 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
10711
10712 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
10713 return is_store;
10714
10715 /* If this stmt defines a value used on a backedge, update the
10716 vectorized PHIs. */
10717 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
10718 stmt_vec_info reduc_info;
10719 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
10720 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
10721 && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
10722 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
10723 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
10724 {
10725 gphi *phi;
10726 edge e;
10727 if (!slp_node
10728 && (phi = dyn_cast <gphi *>
10729 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
10730 && dominated_by_p (CDI_DOMINATORS,
10731 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
10732 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
10733 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
10734 == gimple_get_lhs (orig_stmt_info->stmt)))
10735 {
10736 vec<gimple *> &phi_info
10737 = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info));
10738 vec<gimple *> &vec_stmt
10739 = STMT_VINFO_VEC_STMTS (stmt_info);
10740 gcc_assert (phi_info.length () == vec_stmt.length ());
10741 for (unsigned i = 0; i < phi_info.length (); ++i)
10742 add_phi_arg (as_a <gphi *> (phi_info[i]),
10743 gimple_get_lhs (vec_stmt[i]), e,
10744 gimple_phi_arg_location (phi, e->dest_idx));
10745 }
10746 else if (slp_node
10747 && slp_node != slp_node_instance->reduc_phis)
10748 {
10749 slp_tree phi_node = slp_node_instance->reduc_phis;
10750 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
10751 e = loop_latch_edge (gimple_bb (phi)->loop_father);
10752 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
10753 == SLP_TREE_VEC_STMTS (slp_node).length ());
10754 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
10755 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
10756 vect_get_slp_vect_def (slp_node, i),
10757 e, gimple_phi_arg_location (phi, e->dest_idx));
10758 }
10759 }
10760
10761 /* Handle stmts whose DEF is used outside the loop-nest that is
10762 being vectorized. */
10763 if (is_a <loop_vec_info> (vinfo))
10764 done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10765 stmt_info, gsi, slp_node,
10766 slp_node_instance, true, NULL);
10767 gcc_assert (done);
10768
10769 return false;
10770 }
10771
10772
10773 /* Remove a group of stores (for SLP or interleaving), free their
10774 stmt_vec_info. */
10775
10776 void
10777 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
10778 {
10779 stmt_vec_info next_stmt_info = first_stmt_info;
10780
10781 while (next_stmt_info)
10782 {
10783 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10784 next_stmt_info = vect_orig_stmt (next_stmt_info);
10785 /* Free the attached stmt_vec_info and remove the stmt. */
10786 vinfo->remove_stmt (next_stmt_info);
10787 next_stmt_info = tmp;
10788 }
10789 }
10790
10791 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10792 elements of type SCALAR_TYPE, or null if the target doesn't support
10793 such a type.
10794
10795 If NUNITS is zero, return a vector type that contains elements of
10796 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10797
10798 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10799 for this vectorization region and want to "autodetect" the best choice.
10800 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10801 and we want the new type to be interoperable with it. PREVAILING_MODE
10802 in this case can be a scalar integer mode or a vector mode; when it
10803 is a vector mode, the function acts like a tree-level version of
10804 related_vector_mode. */
10805
10806 tree
10807 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
10808 tree scalar_type, poly_uint64 nunits)
10809 {
10810 tree orig_scalar_type = scalar_type;
10811 scalar_mode inner_mode;
10812 machine_mode simd_mode;
10813 tree vectype;
10814
10815 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10816 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10817 return NULL_TREE;
10818
10819 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10820
10821 /* For vector types of elements whose mode precision doesn't
10822 match their types precision we use a element type of mode
10823 precision. The vectorization routines will have to make sure
10824 they support the proper result truncation/extension.
10825 We also make sure to build vector types with INTEGER_TYPE
10826 component type only. */
10827 if (INTEGRAL_TYPE_P (scalar_type)
10828 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
10829 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10830 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10831 TYPE_UNSIGNED (scalar_type));
10832
10833 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10834 When the component mode passes the above test simply use a type
10835 corresponding to that mode. The theory is that any use that
10836 would cause problems with this will disable vectorization anyway. */
10837 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10838 && !INTEGRAL_TYPE_P (scalar_type))
10839 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10840
10841 /* We can't build a vector type of elements with alignment bigger than
10842 their size. */
10843 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10844 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10845 TYPE_UNSIGNED (scalar_type));
10846
10847 /* If we felt back to using the mode fail if there was
10848 no scalar type for it. */
10849 if (scalar_type == NULL_TREE)
10850 return NULL_TREE;
10851
10852 /* If no prevailing mode was supplied, use the mode the target prefers.
10853 Otherwise lookup a vector mode based on the prevailing mode. */
10854 if (prevailing_mode == VOIDmode)
10855 {
10856 gcc_assert (known_eq (nunits, 0U));
10857 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
10858 if (SCALAR_INT_MODE_P (simd_mode))
10859 {
10860 /* Traditional behavior is not to take the integer mode
10861 literally, but simply to use it as a way of determining
10862 the vector size. It is up to mode_for_vector to decide
10863 what the TYPE_MODE should be.
10864
10865 Note that nunits == 1 is allowed in order to support single
10866 element vector types. */
10867 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
10868 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10869 return NULL_TREE;
10870 }
10871 }
10872 else if (SCALAR_INT_MODE_P (prevailing_mode)
10873 || !related_vector_mode (prevailing_mode,
10874 inner_mode, nunits).exists (&simd_mode))
10875 {
10876 /* Fall back to using mode_for_vector, mostly in the hope of being
10877 able to use an integer mode. */
10878 if (known_eq (nunits, 0U)
10879 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
10880 return NULL_TREE;
10881
10882 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10883 return NULL_TREE;
10884 }
10885
10886 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
10887
10888 /* In cases where the mode was chosen by mode_for_vector, check that
10889 the target actually supports the chosen mode, or that it at least
10890 allows the vector mode to be replaced by a like-sized integer. */
10891 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10892 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
10893 return NULL_TREE;
10894
10895 /* Re-attach the address-space qualifier if we canonicalized the scalar
10896 type. */
10897 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10898 return build_qualified_type
10899 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10900
10901 return vectype;
10902 }
10903
10904 /* Function get_vectype_for_scalar_type.
10905
10906 Returns the vector type corresponding to SCALAR_TYPE as supported
10907 by the target. If GROUP_SIZE is nonzero and we're performing BB
10908 vectorization, make sure that the number of elements in the vector
10909 is no bigger than GROUP_SIZE. */
10910
10911 tree
10912 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
10913 unsigned int group_size)
10914 {
10915 /* For BB vectorization, we should always have a group size once we've
10916 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
10917 are tentative requests during things like early data reference
10918 analysis and pattern recognition. */
10919 if (is_a <bb_vec_info> (vinfo))
10920 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
10921 else
10922 group_size = 0;
10923
10924 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
10925 scalar_type);
10926 if (vectype && vinfo->vector_mode == VOIDmode)
10927 vinfo->vector_mode = TYPE_MODE (vectype);
10928
10929 /* Register the natural choice of vector type, before the group size
10930 has been applied. */
10931 if (vectype)
10932 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
10933
10934 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
10935 try again with an explicit number of elements. */
10936 if (vectype
10937 && group_size
10938 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
10939 {
10940 /* Start with the biggest number of units that fits within
10941 GROUP_SIZE and halve it until we find a valid vector type.
10942 Usually either the first attempt will succeed or all will
10943 fail (in the latter case because GROUP_SIZE is too small
10944 for the target), but it's possible that a target could have
10945 a hole between supported vector types.
10946
10947 If GROUP_SIZE is not a power of 2, this has the effect of
10948 trying the largest power of 2 that fits within the group,
10949 even though the group is not a multiple of that vector size.
10950 The BB vectorizer will then try to carve up the group into
10951 smaller pieces. */
10952 unsigned int nunits = 1 << floor_log2 (group_size);
10953 do
10954 {
10955 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
10956 scalar_type, nunits);
10957 nunits /= 2;
10958 }
10959 while (nunits > 1 && !vectype);
10960 }
10961
10962 return vectype;
10963 }
10964
10965 /* Return the vector type corresponding to SCALAR_TYPE as supported
10966 by the target. NODE, if nonnull, is the SLP tree node that will
10967 use the returned vector type. */
10968
10969 tree
10970 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
10971 {
10972 unsigned int group_size = 0;
10973 if (node)
10974 group_size = SLP_TREE_LANES (node);
10975 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
10976 }
10977
10978 /* Function get_mask_type_for_scalar_type.
10979
10980 Returns the mask type corresponding to a result of comparison
10981 of vectors of specified SCALAR_TYPE as supported by target.
10982 If GROUP_SIZE is nonzero and we're performing BB vectorization,
10983 make sure that the number of elements in the vector is no bigger
10984 than GROUP_SIZE. */
10985
10986 tree
10987 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
10988 unsigned int group_size)
10989 {
10990 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
10991
10992 if (!vectype)
10993 return NULL;
10994
10995 return truth_type_for (vectype);
10996 }
10997
10998 /* Function get_same_sized_vectype
10999
11000 Returns a vector type corresponding to SCALAR_TYPE of size
11001 VECTOR_TYPE if supported by the target. */
11002
11003 tree
11004 get_same_sized_vectype (tree scalar_type, tree vector_type)
11005 {
11006 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11007 return truth_type_for (vector_type);
11008
11009 poly_uint64 nunits;
11010 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11011 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11012 return NULL_TREE;
11013
11014 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11015 scalar_type, nunits);
11016 }
11017
11018 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11019 would not change the chosen vector modes. */
11020
11021 bool
11022 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11023 {
11024 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11025 i != vinfo->used_vector_modes.end (); ++i)
11026 if (!VECTOR_MODE_P (*i)
11027 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11028 return false;
11029 return true;
11030 }
11031
11032 /* Function vect_is_simple_use.
11033
11034 Input:
11035 VINFO - the vect info of the loop or basic block that is being vectorized.
11036 OPERAND - operand in the loop or bb.
11037 Output:
11038 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11039 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11040 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11041 the definition could be anywhere in the function
11042 DT - the type of definition
11043
11044 Returns whether a stmt with OPERAND can be vectorized.
11045 For loops, supportable operands are constants, loop invariants, and operands
11046 that are defined by the current iteration of the loop. Unsupportable
11047 operands are those that are defined by a previous iteration of the loop (as
11048 is the case in reduction/induction computations).
11049 For basic blocks, supportable operands are constants and bb invariants.
11050 For now, operands defined outside the basic block are not supported. */
11051
11052 bool
11053 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11054 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11055 {
11056 if (def_stmt_info_out)
11057 *def_stmt_info_out = NULL;
11058 if (def_stmt_out)
11059 *def_stmt_out = NULL;
11060 *dt = vect_unknown_def_type;
11061
11062 if (dump_enabled_p ())
11063 {
11064 dump_printf_loc (MSG_NOTE, vect_location,
11065 "vect_is_simple_use: operand ");
11066 if (TREE_CODE (operand) == SSA_NAME
11067 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11068 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11069 else
11070 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11071 }
11072
11073 if (CONSTANT_CLASS_P (operand))
11074 *dt = vect_constant_def;
11075 else if (is_gimple_min_invariant (operand))
11076 *dt = vect_external_def;
11077 else if (TREE_CODE (operand) != SSA_NAME)
11078 *dt = vect_unknown_def_type;
11079 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11080 *dt = vect_external_def;
11081 else
11082 {
11083 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11084 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11085 if (!stmt_vinfo)
11086 *dt = vect_external_def;
11087 else
11088 {
11089 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11090 def_stmt = stmt_vinfo->stmt;
11091 switch (gimple_code (def_stmt))
11092 {
11093 case GIMPLE_PHI:
11094 case GIMPLE_ASSIGN:
11095 case GIMPLE_CALL:
11096 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11097 break;
11098 default:
11099 *dt = vect_unknown_def_type;
11100 break;
11101 }
11102 if (def_stmt_info_out)
11103 *def_stmt_info_out = stmt_vinfo;
11104 }
11105 if (def_stmt_out)
11106 *def_stmt_out = def_stmt;
11107 }
11108
11109 if (dump_enabled_p ())
11110 {
11111 dump_printf (MSG_NOTE, ", type of def: ");
11112 switch (*dt)
11113 {
11114 case vect_uninitialized_def:
11115 dump_printf (MSG_NOTE, "uninitialized\n");
11116 break;
11117 case vect_constant_def:
11118 dump_printf (MSG_NOTE, "constant\n");
11119 break;
11120 case vect_external_def:
11121 dump_printf (MSG_NOTE, "external\n");
11122 break;
11123 case vect_internal_def:
11124 dump_printf (MSG_NOTE, "internal\n");
11125 break;
11126 case vect_induction_def:
11127 dump_printf (MSG_NOTE, "induction\n");
11128 break;
11129 case vect_reduction_def:
11130 dump_printf (MSG_NOTE, "reduction\n");
11131 break;
11132 case vect_double_reduction_def:
11133 dump_printf (MSG_NOTE, "double reduction\n");
11134 break;
11135 case vect_nested_cycle:
11136 dump_printf (MSG_NOTE, "nested cycle\n");
11137 break;
11138 case vect_unknown_def_type:
11139 dump_printf (MSG_NOTE, "unknown\n");
11140 break;
11141 }
11142 }
11143
11144 if (*dt == vect_unknown_def_type)
11145 {
11146 if (dump_enabled_p ())
11147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11148 "Unsupported pattern.\n");
11149 return false;
11150 }
11151
11152 return true;
11153 }
11154
11155 /* Function vect_is_simple_use.
11156
11157 Same as vect_is_simple_use but also determines the vector operand
11158 type of OPERAND and stores it to *VECTYPE. If the definition of
11159 OPERAND is vect_uninitialized_def, vect_constant_def or
11160 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11161 is responsible to compute the best suited vector type for the
11162 scalar operand. */
11163
11164 bool
11165 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11166 tree *vectype, stmt_vec_info *def_stmt_info_out,
11167 gimple **def_stmt_out)
11168 {
11169 stmt_vec_info def_stmt_info;
11170 gimple *def_stmt;
11171 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11172 return false;
11173
11174 if (def_stmt_out)
11175 *def_stmt_out = def_stmt;
11176 if (def_stmt_info_out)
11177 *def_stmt_info_out = def_stmt_info;
11178
11179 /* Now get a vector type if the def is internal, otherwise supply
11180 NULL_TREE and leave it up to the caller to figure out a proper
11181 type for the use stmt. */
11182 if (*dt == vect_internal_def
11183 || *dt == vect_induction_def
11184 || *dt == vect_reduction_def
11185 || *dt == vect_double_reduction_def
11186 || *dt == vect_nested_cycle)
11187 {
11188 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11189 gcc_assert (*vectype != NULL_TREE);
11190 if (dump_enabled_p ())
11191 dump_printf_loc (MSG_NOTE, vect_location,
11192 "vect_is_simple_use: vectype %T\n", *vectype);
11193 }
11194 else if (*dt == vect_uninitialized_def
11195 || *dt == vect_constant_def
11196 || *dt == vect_external_def)
11197 *vectype = NULL_TREE;
11198 else
11199 gcc_unreachable ();
11200
11201 return true;
11202 }
11203
11204 /* Function vect_is_simple_use.
11205
11206 Same as vect_is_simple_use but determines the operand by operand
11207 position OPERAND from either STMT or SLP_NODE, filling in *OP
11208 and *SLP_DEF (when SLP_NODE is not NULL). */
11209
11210 bool
11211 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11212 unsigned operand, tree *op, slp_tree *slp_def,
11213 enum vect_def_type *dt,
11214 tree *vectype, stmt_vec_info *def_stmt_info_out)
11215 {
11216 if (slp_node)
11217 {
11218 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11219 *slp_def = child;
11220 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11221 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11222 else
11223 {
11224 if (def_stmt_info_out)
11225 *def_stmt_info_out = NULL;
11226 *op = SLP_TREE_SCALAR_OPS (child)[0];
11227 *dt = SLP_TREE_DEF_TYPE (child);
11228 *vectype = SLP_TREE_VECTYPE (child);
11229 return true;
11230 }
11231 }
11232 else
11233 {
11234 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11235 {
11236 if (gimple_assign_rhs_code (ass) == COND_EXPR
11237 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11238 {
11239 if (operand < 2)
11240 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11241 else
11242 *op = gimple_op (ass, operand);
11243 }
11244 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11245 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11246 else
11247 *op = gimple_op (ass, operand + 1);
11248 }
11249 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11250 {
11251 if (gimple_call_internal_p (call)
11252 && internal_store_fn_p (gimple_call_internal_fn (call)))
11253 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11254 (call));
11255 *op = gimple_call_arg (call, operand);
11256 }
11257 else
11258 gcc_unreachable ();
11259 }
11260
11261 /* ??? We might want to update *vectype from *slp_def here though
11262 when sharing nodes this would prevent unsharing in the caller. */
11263 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11264 }
11265
11266 /* If OP is not NULL and is external or constant update its vector
11267 type with VECTYPE. Returns true if successful or false if not,
11268 for example when conflicting vector types are present. */
11269
11270 bool
11271 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11272 {
11273 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11274 return true;
11275 if (SLP_TREE_VECTYPE (op))
11276 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11277 SLP_TREE_VECTYPE (op) = vectype;
11278 return true;
11279 }
11280
11281 /* Function supportable_widening_operation
11282
11283 Check whether an operation represented by the code CODE is a
11284 widening operation that is supported by the target platform in
11285 vector form (i.e., when operating on arguments of type VECTYPE_IN
11286 producing a result of type VECTYPE_OUT).
11287
11288 Widening operations we currently support are NOP (CONVERT), FLOAT,
11289 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11290 are supported by the target platform either directly (via vector
11291 tree-codes), or via target builtins.
11292
11293 Output:
11294 - CODE1 and CODE2 are codes of vector operations to be used when
11295 vectorizing the operation, if available.
11296 - MULTI_STEP_CVT determines the number of required intermediate steps in
11297 case of multi-step conversion (like char->short->int - in that case
11298 MULTI_STEP_CVT will be 1).
11299 - INTERM_TYPES contains the intermediate type required to perform the
11300 widening operation (short in the above example). */
11301
11302 bool
11303 supportable_widening_operation (vec_info *vinfo,
11304 enum tree_code code, stmt_vec_info stmt_info,
11305 tree vectype_out, tree vectype_in,
11306 enum tree_code *code1, enum tree_code *code2,
11307 int *multi_step_cvt,
11308 vec<tree> *interm_types)
11309 {
11310 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11311 class loop *vect_loop = NULL;
11312 machine_mode vec_mode;
11313 enum insn_code icode1, icode2;
11314 optab optab1, optab2;
11315 tree vectype = vectype_in;
11316 tree wide_vectype = vectype_out;
11317 enum tree_code c1, c2;
11318 int i;
11319 tree prev_type, intermediate_type;
11320 machine_mode intermediate_mode, prev_mode;
11321 optab optab3, optab4;
11322
11323 *multi_step_cvt = 0;
11324 if (loop_info)
11325 vect_loop = LOOP_VINFO_LOOP (loop_info);
11326
11327 switch (code)
11328 {
11329 case WIDEN_MULT_EXPR:
11330 /* The result of a vectorized widening operation usually requires
11331 two vectors (because the widened results do not fit into one vector).
11332 The generated vector results would normally be expected to be
11333 generated in the same order as in the original scalar computation,
11334 i.e. if 8 results are generated in each vector iteration, they are
11335 to be organized as follows:
11336 vect1: [res1,res2,res3,res4],
11337 vect2: [res5,res6,res7,res8].
11338
11339 However, in the special case that the result of the widening
11340 operation is used in a reduction computation only, the order doesn't
11341 matter (because when vectorizing a reduction we change the order of
11342 the computation). Some targets can take advantage of this and
11343 generate more efficient code. For example, targets like Altivec,
11344 that support widen_mult using a sequence of {mult_even,mult_odd}
11345 generate the following vectors:
11346 vect1: [res1,res3,res5,res7],
11347 vect2: [res2,res4,res6,res8].
11348
11349 When vectorizing outer-loops, we execute the inner-loop sequentially
11350 (each vectorized inner-loop iteration contributes to VF outer-loop
11351 iterations in parallel). We therefore don't allow to change the
11352 order of the computation in the inner-loop during outer-loop
11353 vectorization. */
11354 /* TODO: Another case in which order doesn't *really* matter is when we
11355 widen and then contract again, e.g. (short)((int)x * y >> 8).
11356 Normally, pack_trunc performs an even/odd permute, whereas the
11357 repack from an even/odd expansion would be an interleave, which
11358 would be significantly simpler for e.g. AVX2. */
11359 /* In any case, in order to avoid duplicating the code below, recurse
11360 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11361 are properly set up for the caller. If we fail, we'll continue with
11362 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11363 if (vect_loop
11364 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11365 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11366 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11367 stmt_info, vectype_out,
11368 vectype_in, code1, code2,
11369 multi_step_cvt, interm_types))
11370 {
11371 /* Elements in a vector with vect_used_by_reduction property cannot
11372 be reordered if the use chain with this property does not have the
11373 same operation. One such an example is s += a * b, where elements
11374 in a and b cannot be reordered. Here we check if the vector defined
11375 by STMT is only directly used in the reduction statement. */
11376 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11377 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11378 if (use_stmt_info
11379 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11380 return true;
11381 }
11382 c1 = VEC_WIDEN_MULT_LO_EXPR;
11383 c2 = VEC_WIDEN_MULT_HI_EXPR;
11384 break;
11385
11386 case DOT_PROD_EXPR:
11387 c1 = DOT_PROD_EXPR;
11388 c2 = DOT_PROD_EXPR;
11389 break;
11390
11391 case SAD_EXPR:
11392 c1 = SAD_EXPR;
11393 c2 = SAD_EXPR;
11394 break;
11395
11396 case VEC_WIDEN_MULT_EVEN_EXPR:
11397 /* Support the recursion induced just above. */
11398 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11399 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11400 break;
11401
11402 case WIDEN_LSHIFT_EXPR:
11403 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11404 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11405 break;
11406
11407 CASE_CONVERT:
11408 c1 = VEC_UNPACK_LO_EXPR;
11409 c2 = VEC_UNPACK_HI_EXPR;
11410 break;
11411
11412 case FLOAT_EXPR:
11413 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11414 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11415 break;
11416
11417 case FIX_TRUNC_EXPR:
11418 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11419 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11420 break;
11421
11422 default:
11423 gcc_unreachable ();
11424 }
11425
11426 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11427 std::swap (c1, c2);
11428
11429 if (code == FIX_TRUNC_EXPR)
11430 {
11431 /* The signedness is determined from output operand. */
11432 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11433 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11434 }
11435 else if (CONVERT_EXPR_CODE_P (code)
11436 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11437 && VECTOR_BOOLEAN_TYPE_P (vectype)
11438 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11439 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11440 {
11441 /* If the input and result modes are the same, a different optab
11442 is needed where we pass in the number of units in vectype. */
11443 optab1 = vec_unpacks_sbool_lo_optab;
11444 optab2 = vec_unpacks_sbool_hi_optab;
11445 }
11446 else
11447 {
11448 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11449 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11450 }
11451
11452 if (!optab1 || !optab2)
11453 return false;
11454
11455 vec_mode = TYPE_MODE (vectype);
11456 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11457 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11458 return false;
11459
11460 *code1 = c1;
11461 *code2 = c2;
11462
11463 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11464 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11465 {
11466 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11467 return true;
11468 /* For scalar masks we may have different boolean
11469 vector types having the same QImode. Thus we
11470 add additional check for elements number. */
11471 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11472 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11473 return true;
11474 }
11475
11476 /* Check if it's a multi-step conversion that can be done using intermediate
11477 types. */
11478
11479 prev_type = vectype;
11480 prev_mode = vec_mode;
11481
11482 if (!CONVERT_EXPR_CODE_P (code))
11483 return false;
11484
11485 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11486 intermediate steps in promotion sequence. We try
11487 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11488 not. */
11489 interm_types->create (MAX_INTERM_CVT_STEPS);
11490 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11491 {
11492 intermediate_mode = insn_data[icode1].operand[0].mode;
11493 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11494 intermediate_type
11495 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11496 else
11497 intermediate_type
11498 = lang_hooks.types.type_for_mode (intermediate_mode,
11499 TYPE_UNSIGNED (prev_type));
11500
11501 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11502 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11503 && intermediate_mode == prev_mode
11504 && SCALAR_INT_MODE_P (prev_mode))
11505 {
11506 /* If the input and result modes are the same, a different optab
11507 is needed where we pass in the number of units in vectype. */
11508 optab3 = vec_unpacks_sbool_lo_optab;
11509 optab4 = vec_unpacks_sbool_hi_optab;
11510 }
11511 else
11512 {
11513 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11514 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11515 }
11516
11517 if (!optab3 || !optab4
11518 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11519 || insn_data[icode1].operand[0].mode != intermediate_mode
11520 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11521 || insn_data[icode2].operand[0].mode != intermediate_mode
11522 || ((icode1 = optab_handler (optab3, intermediate_mode))
11523 == CODE_FOR_nothing)
11524 || ((icode2 = optab_handler (optab4, intermediate_mode))
11525 == CODE_FOR_nothing))
11526 break;
11527
11528 interm_types->quick_push (intermediate_type);
11529 (*multi_step_cvt)++;
11530
11531 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11532 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11533 {
11534 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11535 return true;
11536 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11537 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11538 return true;
11539 }
11540
11541 prev_type = intermediate_type;
11542 prev_mode = intermediate_mode;
11543 }
11544
11545 interm_types->release ();
11546 return false;
11547 }
11548
11549
11550 /* Function supportable_narrowing_operation
11551
11552 Check whether an operation represented by the code CODE is a
11553 narrowing operation that is supported by the target platform in
11554 vector form (i.e., when operating on arguments of type VECTYPE_IN
11555 and producing a result of type VECTYPE_OUT).
11556
11557 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11558 and FLOAT. This function checks if these operations are supported by
11559 the target platform directly via vector tree-codes.
11560
11561 Output:
11562 - CODE1 is the code of a vector operation to be used when
11563 vectorizing the operation, if available.
11564 - MULTI_STEP_CVT determines the number of required intermediate steps in
11565 case of multi-step conversion (like int->short->char - in that case
11566 MULTI_STEP_CVT will be 1).
11567 - INTERM_TYPES contains the intermediate type required to perform the
11568 narrowing operation (short in the above example). */
11569
11570 bool
11571 supportable_narrowing_operation (enum tree_code code,
11572 tree vectype_out, tree vectype_in,
11573 enum tree_code *code1, int *multi_step_cvt,
11574 vec<tree> *interm_types)
11575 {
11576 machine_mode vec_mode;
11577 enum insn_code icode1;
11578 optab optab1, interm_optab;
11579 tree vectype = vectype_in;
11580 tree narrow_vectype = vectype_out;
11581 enum tree_code c1;
11582 tree intermediate_type, prev_type;
11583 machine_mode intermediate_mode, prev_mode;
11584 int i;
11585 bool uns;
11586
11587 *multi_step_cvt = 0;
11588 switch (code)
11589 {
11590 CASE_CONVERT:
11591 c1 = VEC_PACK_TRUNC_EXPR;
11592 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11593 && VECTOR_BOOLEAN_TYPE_P (vectype)
11594 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11595 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11596 optab1 = vec_pack_sbool_trunc_optab;
11597 else
11598 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11599 break;
11600
11601 case FIX_TRUNC_EXPR:
11602 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11603 /* The signedness is determined from output operand. */
11604 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11605 break;
11606
11607 case FLOAT_EXPR:
11608 c1 = VEC_PACK_FLOAT_EXPR;
11609 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11610 break;
11611
11612 default:
11613 gcc_unreachable ();
11614 }
11615
11616 if (!optab1)
11617 return false;
11618
11619 vec_mode = TYPE_MODE (vectype);
11620 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11621 return false;
11622
11623 *code1 = c1;
11624
11625 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11626 {
11627 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11628 return true;
11629 /* For scalar masks we may have different boolean
11630 vector types having the same QImode. Thus we
11631 add additional check for elements number. */
11632 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11633 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11634 return true;
11635 }
11636
11637 if (code == FLOAT_EXPR)
11638 return false;
11639
11640 /* Check if it's a multi-step conversion that can be done using intermediate
11641 types. */
11642 prev_mode = vec_mode;
11643 prev_type = vectype;
11644 if (code == FIX_TRUNC_EXPR)
11645 uns = TYPE_UNSIGNED (vectype_out);
11646 else
11647 uns = TYPE_UNSIGNED (vectype);
11648
11649 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11650 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11651 costly than signed. */
11652 if (code == FIX_TRUNC_EXPR && uns)
11653 {
11654 enum insn_code icode2;
11655
11656 intermediate_type
11657 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11658 interm_optab
11659 = optab_for_tree_code (c1, intermediate_type, optab_default);
11660 if (interm_optab != unknown_optab
11661 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11662 && insn_data[icode1].operand[0].mode
11663 == insn_data[icode2].operand[0].mode)
11664 {
11665 uns = false;
11666 optab1 = interm_optab;
11667 icode1 = icode2;
11668 }
11669 }
11670
11671 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11672 intermediate steps in promotion sequence. We try
11673 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11674 interm_types->create (MAX_INTERM_CVT_STEPS);
11675 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11676 {
11677 intermediate_mode = insn_data[icode1].operand[0].mode;
11678 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11679 intermediate_type
11680 = vect_double_mask_nunits (prev_type, intermediate_mode);
11681 else
11682 intermediate_type
11683 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11684 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11685 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11686 && intermediate_mode == prev_mode
11687 && SCALAR_INT_MODE_P (prev_mode))
11688 interm_optab = vec_pack_sbool_trunc_optab;
11689 else
11690 interm_optab
11691 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11692 optab_default);
11693 if (!interm_optab
11694 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11695 || insn_data[icode1].operand[0].mode != intermediate_mode
11696 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11697 == CODE_FOR_nothing))
11698 break;
11699
11700 interm_types->quick_push (intermediate_type);
11701 (*multi_step_cvt)++;
11702
11703 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11704 {
11705 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11706 return true;
11707 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11708 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11709 return true;
11710 }
11711
11712 prev_mode = intermediate_mode;
11713 prev_type = intermediate_type;
11714 optab1 = interm_optab;
11715 }
11716
11717 interm_types->release ();
11718 return false;
11719 }
11720
11721 /* Generate and return a statement that sets vector mask MASK such that
11722 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11723
11724 gcall *
11725 vect_gen_while (tree mask, tree start_index, tree end_index)
11726 {
11727 tree cmp_type = TREE_TYPE (start_index);
11728 tree mask_type = TREE_TYPE (mask);
11729 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11730 cmp_type, mask_type,
11731 OPTIMIZE_FOR_SPEED));
11732 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11733 start_index, end_index,
11734 build_zero_cst (mask_type));
11735 gimple_call_set_lhs (call, mask);
11736 return call;
11737 }
11738
11739 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11740 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11741
11742 tree
11743 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11744 tree end_index)
11745 {
11746 tree tmp = make_ssa_name (mask_type);
11747 gcall *call = vect_gen_while (tmp, start_index, end_index);
11748 gimple_seq_add_stmt (seq, call);
11749 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11750 }
11751
11752 /* Try to compute the vector types required to vectorize STMT_INFO,
11753 returning true on success and false if vectorization isn't possible.
11754 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11755 take sure that the number of elements in the vectors is no bigger
11756 than GROUP_SIZE.
11757
11758 On success:
11759
11760 - Set *STMT_VECTYPE_OUT to:
11761 - NULL_TREE if the statement doesn't need to be vectorized;
11762 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11763
11764 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11765 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11766 statement does not help to determine the overall number of units. */
11767
11768 opt_result
11769 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
11770 tree *stmt_vectype_out,
11771 tree *nunits_vectype_out,
11772 unsigned int group_size)
11773 {
11774 gimple *stmt = stmt_info->stmt;
11775
11776 /* For BB vectorization, we should always have a group size once we've
11777 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11778 are tentative requests during things like early data reference
11779 analysis and pattern recognition. */
11780 if (is_a <bb_vec_info> (vinfo))
11781 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11782 else
11783 group_size = 0;
11784
11785 *stmt_vectype_out = NULL_TREE;
11786 *nunits_vectype_out = NULL_TREE;
11787
11788 if (gimple_get_lhs (stmt) == NULL_TREE
11789 /* MASK_STORE has no lhs, but is ok. */
11790 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11791 {
11792 if (is_a <gcall *> (stmt))
11793 {
11794 /* Ignore calls with no lhs. These must be calls to
11795 #pragma omp simd functions, and what vectorization factor
11796 it really needs can't be determined until
11797 vectorizable_simd_clone_call. */
11798 if (dump_enabled_p ())
11799 dump_printf_loc (MSG_NOTE, vect_location,
11800 "defer to SIMD clone analysis.\n");
11801 return opt_result::success ();
11802 }
11803
11804 return opt_result::failure_at (stmt,
11805 "not vectorized: irregular stmt.%G", stmt);
11806 }
11807
11808 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11809 return opt_result::failure_at (stmt,
11810 "not vectorized: vector stmt in loop:%G",
11811 stmt);
11812
11813 tree vectype;
11814 tree scalar_type = NULL_TREE;
11815 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
11816 {
11817 vectype = STMT_VINFO_VECTYPE (stmt_info);
11818 if (dump_enabled_p ())
11819 dump_printf_loc (MSG_NOTE, vect_location,
11820 "precomputed vectype: %T\n", vectype);
11821 }
11822 else if (vect_use_mask_type_p (stmt_info))
11823 {
11824 unsigned int precision = stmt_info->mask_precision;
11825 scalar_type = build_nonstandard_integer_type (precision, 1);
11826 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
11827 if (!vectype)
11828 return opt_result::failure_at (stmt, "not vectorized: unsupported"
11829 " data-type %T\n", scalar_type);
11830 if (dump_enabled_p ())
11831 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11832 }
11833 else
11834 {
11835 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
11836 scalar_type = TREE_TYPE (DR_REF (dr));
11837 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11838 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11839 else
11840 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11841
11842 if (dump_enabled_p ())
11843 {
11844 if (group_size)
11845 dump_printf_loc (MSG_NOTE, vect_location,
11846 "get vectype for scalar type (group size %d):"
11847 " %T\n", group_size, scalar_type);
11848 else
11849 dump_printf_loc (MSG_NOTE, vect_location,
11850 "get vectype for scalar type: %T\n", scalar_type);
11851 }
11852 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11853 if (!vectype)
11854 return opt_result::failure_at (stmt,
11855 "not vectorized:"
11856 " unsupported data-type %T\n",
11857 scalar_type);
11858
11859 if (dump_enabled_p ())
11860 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11861 }
11862 *stmt_vectype_out = vectype;
11863
11864 /* Don't try to compute scalar types if the stmt produces a boolean
11865 vector; use the existing vector type instead. */
11866 tree nunits_vectype = vectype;
11867 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11868 {
11869 /* The number of units is set according to the smallest scalar
11870 type (or the largest vector size, but we only support one
11871 vector size per vectorization). */
11872 HOST_WIDE_INT dummy;
11873 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
11874 if (scalar_type != TREE_TYPE (vectype))
11875 {
11876 if (dump_enabled_p ())
11877 dump_printf_loc (MSG_NOTE, vect_location,
11878 "get vectype for smallest scalar type: %T\n",
11879 scalar_type);
11880 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11881 group_size);
11882 if (!nunits_vectype)
11883 return opt_result::failure_at
11884 (stmt, "not vectorized: unsupported data-type %T\n",
11885 scalar_type);
11886 if (dump_enabled_p ())
11887 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
11888 nunits_vectype);
11889 }
11890 }
11891
11892 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
11893 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
11894
11895 if (dump_enabled_p ())
11896 {
11897 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
11898 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
11899 dump_printf (MSG_NOTE, "\n");
11900 }
11901
11902 *nunits_vectype_out = nunits_vectype;
11903 return opt_result::success ();
11904 }