]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-slp.c
Avoid retrying with the same vector modes
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
42 #include "dbgcnt.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47
48
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
52
53 static void
54 vect_free_slp_tree (slp_tree node, bool final_p)
55 {
56 int i;
57 slp_tree child;
58
59 if (--node->refcnt != 0)
60 return;
61
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
63 vect_free_slp_tree (child, final_p);
64
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
69 if (!final_p)
70 {
71 stmt_vec_info stmt_info;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
73 {
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
76 }
77 }
78
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
81 SLP_TREE_SCALAR_OPS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
83 SLP_TREE_LOAD_PERMUTATION (node).release ();
84
85 free (node);
86 }
87
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
91
92 void
93 vect_free_slp_instance (slp_instance instance, bool final_p)
94 {
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96 SLP_INSTANCE_LOADS (instance).release ();
97 free (instance);
98 }
99
100
101 /* Create an SLP node for SCALAR_STMTS. */
102
103 static slp_tree
104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
105 {
106 slp_tree node;
107 stmt_vec_info stmt_info = scalar_stmts[0];
108 unsigned int nops;
109
110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
118 else if (is_a <gphi *> (stmt_info->stmt))
119 nops = 0;
120 else
121 return NULL;
122
123 node = XNEW (struct _slp_tree);
124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125 SLP_TREE_SCALAR_OPS (node) = vNULL;
126 SLP_TREE_VEC_STMTS (node).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
128 SLP_TREE_CHILDREN (node).create (nops);
129 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
130 SLP_TREE_TWO_OPERATORS (node) = false;
131 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
132 node->refcnt = 1;
133 node->max_nunits = 1;
134
135 unsigned i;
136 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
138
139 return node;
140 }
141
142 /* Create an SLP node for OPS. */
143
144 static slp_tree
145 vect_create_new_slp_node (vec<tree> ops)
146 {
147 slp_tree node;
148
149 node = XNEW (struct _slp_tree);
150 SLP_TREE_SCALAR_STMTS (node) = vNULL;
151 SLP_TREE_SCALAR_OPS (node) = ops;
152 SLP_TREE_VEC_STMTS (node).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154 SLP_TREE_CHILDREN (node) = vNULL;
155 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156 SLP_TREE_TWO_OPERATORS (node) = false;
157 SLP_TREE_DEF_TYPE (node) = vect_external_def;
158 node->refcnt = 1;
159 node->max_nunits = 1;
160
161 return node;
162 }
163
164
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
167 node. */
168 typedef struct _slp_oprnd_info
169 {
170 /* Def-stmts for the operands. */
171 vec<stmt_vec_info> def_stmts;
172 /* Operands. */
173 vec<tree> ops;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
176 stmt. */
177 tree first_op_type;
178 enum vect_def_type first_dt;
179 bool any_pattern;
180 } *slp_oprnd_info;
181
182
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184 operand. */
185 static vec<slp_oprnd_info>
186 vect_create_oprnd_info (int nops, int group_size)
187 {
188 int i;
189 slp_oprnd_info oprnd_info;
190 vec<slp_oprnd_info> oprnds_info;
191
192 oprnds_info.create (nops);
193 for (i = 0; i < nops; i++)
194 {
195 oprnd_info = XNEW (struct _slp_oprnd_info);
196 oprnd_info->def_stmts.create (group_size);
197 oprnd_info->ops.create (group_size);
198 oprnd_info->first_dt = vect_uninitialized_def;
199 oprnd_info->first_op_type = NULL_TREE;
200 oprnd_info->any_pattern = false;
201 oprnds_info.quick_push (oprnd_info);
202 }
203
204 return oprnds_info;
205 }
206
207
208 /* Free operands info. */
209
210 static void
211 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
212 {
213 int i;
214 slp_oprnd_info oprnd_info;
215
216 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
217 {
218 oprnd_info->def_stmts.release ();
219 oprnd_info->ops.release ();
220 XDELETE (oprnd_info);
221 }
222
223 oprnds_info.release ();
224 }
225
226
227 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
228 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
229 of the chain. */
230
231 int
232 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
233 stmt_vec_info first_stmt_info)
234 {
235 stmt_vec_info next_stmt_info = first_stmt_info;
236 int result = 0;
237
238 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
239 return -1;
240
241 do
242 {
243 if (next_stmt_info == stmt_info)
244 return result;
245 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
246 if (next_stmt_info)
247 result += DR_GROUP_GAP (next_stmt_info);
248 }
249 while (next_stmt_info);
250
251 return -1;
252 }
253
254 /* Check whether it is possible to load COUNT elements of type ELT_MODE
255 using the method implemented by duplicate_and_interleave. Return true
256 if so, returning the number of intermediate vectors in *NVECTORS_OUT
257 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
258 (if nonnull). */
259
260 bool
261 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
262 machine_mode elt_mode,
263 unsigned int *nvectors_out,
264 tree *vector_type_out,
265 tree *permutes)
266 {
267 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
268 poly_int64 nelts;
269 unsigned int nvectors = 1;
270 for (;;)
271 {
272 scalar_int_mode int_mode;
273 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
274 if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
275 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
276 {
277 tree int_type = build_nonstandard_integer_type
278 (GET_MODE_BITSIZE (int_mode), 1);
279 tree vector_type = build_vector_type (int_type, nelts);
280 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
281 {
282 vec_perm_builder sel1 (nelts, 2, 3);
283 vec_perm_builder sel2 (nelts, 2, 3);
284 poly_int64 half_nelts = exact_div (nelts, 2);
285 for (unsigned int i = 0; i < 3; ++i)
286 {
287 sel1.quick_push (i);
288 sel1.quick_push (i + nelts);
289 sel2.quick_push (half_nelts + i);
290 sel2.quick_push (half_nelts + i + nelts);
291 }
292 vec_perm_indices indices1 (sel1, 2, nelts);
293 vec_perm_indices indices2 (sel2, 2, nelts);
294 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
295 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
296 {
297 if (nvectors_out)
298 *nvectors_out = nvectors;
299 if (vector_type_out)
300 *vector_type_out = vector_type;
301 if (permutes)
302 {
303 permutes[0] = vect_gen_perm_mask_checked (vector_type,
304 indices1);
305 permutes[1] = vect_gen_perm_mask_checked (vector_type,
306 indices2);
307 }
308 return true;
309 }
310 }
311 }
312 if (!multiple_p (elt_bytes, 2, &elt_bytes))
313 return false;
314 nvectors *= 2;
315 }
316 }
317
318 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
319 they are of a valid type and that they match the defs of the first stmt of
320 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
321 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
322 indicates swap is required for cond_expr stmts. Specifically, *SWAP
323 is 1 if STMT is cond and operands of comparison need to be swapped;
324 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
325 If there is any operand swap in this function, *SWAP is set to non-zero
326 value.
327 If there was a fatal error return -1; if the error could be corrected by
328 swapping operands of father node of this one, return 1; if everything is
329 ok return 0. */
330 static int
331 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
332 vec<stmt_vec_info> stmts, unsigned stmt_num,
333 vec<slp_oprnd_info> *oprnds_info)
334 {
335 stmt_vec_info stmt_info = stmts[stmt_num];
336 tree oprnd;
337 unsigned int i, number_of_oprnds;
338 enum vect_def_type dt = vect_uninitialized_def;
339 slp_oprnd_info oprnd_info;
340 int first_op_idx = 1;
341 unsigned int commutative_op = -1U;
342 bool first_op_cond = false;
343 bool first = stmt_num == 0;
344
345 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
346 {
347 number_of_oprnds = gimple_call_num_args (stmt);
348 first_op_idx = 3;
349 if (gimple_call_internal_p (stmt))
350 {
351 internal_fn ifn = gimple_call_internal_fn (stmt);
352 commutative_op = first_commutative_argument (ifn);
353
354 /* Masked load, only look at mask. */
355 if (ifn == IFN_MASK_LOAD)
356 {
357 number_of_oprnds = 1;
358 /* Mask operand index. */
359 first_op_idx = 5;
360 }
361 }
362 }
363 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
364 {
365 enum tree_code code = gimple_assign_rhs_code (stmt);
366 number_of_oprnds = gimple_num_ops (stmt) - 1;
367 /* Swap can only be done for cond_expr if asked to, otherwise we
368 could result in different comparison code to the first stmt. */
369 if (code == COND_EXPR
370 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
371 {
372 first_op_cond = true;
373 number_of_oprnds++;
374 }
375 else
376 commutative_op = commutative_tree_code (code) ? 0U : -1U;
377 }
378 else
379 return -1;
380
381 bool swapped = (*swap != 0);
382 gcc_assert (!swapped || first_op_cond);
383 for (i = 0; i < number_of_oprnds; i++)
384 {
385 again:
386 if (first_op_cond)
387 {
388 /* Map indicating how operands of cond_expr should be swapped. */
389 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
390 int *map = maps[*swap];
391
392 if (i < 2)
393 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
394 first_op_idx), map[i]);
395 else
396 oprnd = gimple_op (stmt_info->stmt, map[i]);
397 }
398 else
399 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
400 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
401 oprnd = TREE_OPERAND (oprnd, 0);
402
403 oprnd_info = (*oprnds_info)[i];
404
405 stmt_vec_info def_stmt_info;
406 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
407 {
408 if (dump_enabled_p ())
409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
410 "Build SLP failed: can't analyze def for %T\n",
411 oprnd);
412
413 return -1;
414 }
415
416 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
417 oprnd_info->any_pattern = true;
418
419 if (first)
420 {
421 /* For the swapping logic below force vect_reduction_def
422 for the reduction op in a SLP reduction group. */
423 if (!STMT_VINFO_DATA_REF (stmt_info)
424 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
425 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
426 && def_stmt_info)
427 dt = vect_reduction_def;
428 oprnd_info->first_dt = dt;
429 oprnd_info->first_op_type = TREE_TYPE (oprnd);
430 }
431 else
432 {
433 /* Not first stmt of the group, check that the def-stmt/s match
434 the def-stmt/s of the first stmt. Allow different definition
435 types for reduction chains: the first stmt must be a
436 vect_reduction_def (a phi node), and the rest
437 end in the reduction chain. */
438 tree type = TREE_TYPE (oprnd);
439 if ((oprnd_info->first_dt != dt
440 && !(oprnd_info->first_dt == vect_reduction_def
441 && !STMT_VINFO_DATA_REF (stmt_info)
442 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
443 && def_stmt_info
444 && !STMT_VINFO_DATA_REF (def_stmt_info)
445 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
446 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
447 && !((oprnd_info->first_dt == vect_external_def
448 || oprnd_info->first_dt == vect_constant_def)
449 && (dt == vect_external_def
450 || dt == vect_constant_def)))
451 || !types_compatible_p (oprnd_info->first_op_type, type)
452 || (!STMT_VINFO_DATA_REF (stmt_info)
453 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
454 && ((!def_stmt_info
455 || STMT_VINFO_DATA_REF (def_stmt_info)
456 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
457 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
458 != (oprnd_info->first_dt != vect_reduction_def))))
459 {
460 /* Try swapping operands if we got a mismatch. */
461 if (i == commutative_op && !swapped)
462 {
463 if (dump_enabled_p ())
464 dump_printf_loc (MSG_NOTE, vect_location,
465 "trying swapped operands\n");
466 swapped = true;
467 goto again;
468 }
469
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
472 "Build SLP failed: different types\n");
473
474 return 1;
475 }
476 if ((dt == vect_constant_def
477 || dt == vect_external_def)
478 && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
479 && (TREE_CODE (type) == BOOLEAN_TYPE
480 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
481 TYPE_MODE (type))))
482 {
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
485 "Build SLP failed: invalid type of def "
486 "for variable-length SLP %T\n", oprnd);
487 return -1;
488 }
489 }
490
491 /* Check the types of the definitions. */
492 switch (dt)
493 {
494 case vect_external_def:
495 /* Make sure to demote the overall operand to external. */
496 oprnd_info->first_dt = vect_external_def;
497 /* Fallthru. */
498 case vect_constant_def:
499 oprnd_info->def_stmts.quick_push (NULL);
500 oprnd_info->ops.quick_push (oprnd);
501 break;
502
503 case vect_internal_def:
504 case vect_reduction_def:
505 if (oprnd_info->first_dt == vect_reduction_def
506 && !STMT_VINFO_DATA_REF (stmt_info)
507 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
508 && !STMT_VINFO_DATA_REF (def_stmt_info)
509 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
510 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
511 {
512 /* For a SLP reduction chain we want to duplicate the
513 reduction to each of the chain members. That gets
514 us a sane SLP graph (still the stmts are not 100%
515 correct wrt the initial values). */
516 gcc_assert (!first);
517 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
518 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
519 break;
520 }
521 /* Fallthru. */
522 case vect_induction_def:
523 oprnd_info->def_stmts.quick_push (def_stmt_info);
524 oprnd_info->ops.quick_push (oprnd);
525 break;
526
527 default:
528 /* FORNOW: Not supported. */
529 if (dump_enabled_p ())
530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531 "Build SLP failed: illegal type of def %T\n",
532 oprnd);
533
534 return -1;
535 }
536 }
537
538 /* Swap operands. */
539 if (swapped)
540 {
541 if (first_op_cond)
542 {
543 /* If there are already uses of this stmt in a SLP instance then
544 we've committed to the operand order and can't swap it. */
545 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
546 {
547 if (dump_enabled_p ())
548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
549 "Build SLP failed: cannot swap operands of "
550 "shared stmt %G", stmt_info->stmt);
551 return -1;
552 }
553
554 /* To get rid of this swapping we have to move the stmt code
555 to the SLP tree as well (and gather it here per stmt). */
556 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
557 tree cond = gimple_assign_rhs1 (stmt);
558 enum tree_code code = TREE_CODE (cond);
559
560 /* Swap. */
561 if (*swap == 1)
562 {
563 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
564 &TREE_OPERAND (cond, 1));
565 TREE_SET_CODE (cond, swap_tree_comparison (code));
566 }
567 /* Invert. */
568 else
569 {
570 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
571 gimple_assign_rhs3_ptr (stmt));
572 if (STMT_VINFO_REDUC_IDX (stmt_info) == 1)
573 STMT_VINFO_REDUC_IDX (stmt_info) = 2;
574 else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
575 STMT_VINFO_REDUC_IDX (stmt_info) = 1;
576 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
577 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
578 gcc_assert (code != ERROR_MARK);
579 TREE_SET_CODE (cond, code);
580 }
581 }
582 else
583 {
584 /* Commutative ops need not reflect swapping, ops are in
585 the SLP tree. */
586 }
587 if (dump_enabled_p ())
588 dump_printf_loc (MSG_NOTE, vect_location,
589 "swapped operands to match def types in %G",
590 stmt_info->stmt);
591 }
592
593 *swap = swapped;
594 return 0;
595 }
596
597 /* Return true if call statements CALL1 and CALL2 are similar enough
598 to be combined into the same SLP group. */
599
600 static bool
601 compatible_calls_p (gcall *call1, gcall *call2)
602 {
603 unsigned int nargs = gimple_call_num_args (call1);
604 if (nargs != gimple_call_num_args (call2))
605 return false;
606
607 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
608 return false;
609
610 if (gimple_call_internal_p (call1))
611 {
612 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
613 TREE_TYPE (gimple_call_lhs (call2))))
614 return false;
615 for (unsigned int i = 0; i < nargs; ++i)
616 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
617 TREE_TYPE (gimple_call_arg (call2, i))))
618 return false;
619 }
620 else
621 {
622 if (!operand_equal_p (gimple_call_fn (call1),
623 gimple_call_fn (call2), 0))
624 return false;
625
626 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
627 return false;
628 }
629 return true;
630 }
631
632 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
633 caller's attempt to find the vector type in STMT_INFO with the narrowest
634 element type. Return true if VECTYPE is nonnull and if it is valid
635 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
636 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
637 vect_build_slp_tree. */
638
639 static bool
640 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
641 tree vectype, poly_uint64 *max_nunits)
642 {
643 if (!vectype)
644 {
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
647 "Build SLP failed: unsupported data-type in %G\n",
648 stmt_info->stmt);
649 /* Fatal mismatch. */
650 return false;
651 }
652
653 /* If populating the vector type requires unrolling then fail
654 before adjusting *max_nunits for basic-block vectorization. */
655 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
656 unsigned HOST_WIDE_INT const_nunits;
657 if (STMT_VINFO_BB_VINFO (stmt_info)
658 && (!nunits.is_constant (&const_nunits)
659 || const_nunits > group_size))
660 {
661 if (dump_enabled_p ())
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
663 "Build SLP failed: unrolling required "
664 "in basic block SLP\n");
665 /* Fatal mismatch. */
666 return false;
667 }
668
669 /* In case of multiple types we need to detect the smallest type. */
670 vect_update_max_nunits (max_nunits, vectype);
671 return true;
672 }
673
674 /* STMTS is a group of GROUP_SIZE SLP statements in which some
675 statements do the same operation as the first statement and in which
676 the others do ALT_STMT_CODE. Return true if we can take one vector
677 of the first operation and one vector of the second and permute them
678 to get the required result. VECTYPE is the type of the vector that
679 would be permuted. */
680
681 static bool
682 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
683 unsigned int group_size, tree vectype,
684 tree_code alt_stmt_code)
685 {
686 unsigned HOST_WIDE_INT count;
687 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
688 return false;
689
690 vec_perm_builder sel (count, count, 1);
691 for (unsigned int i = 0; i < count; ++i)
692 {
693 unsigned int elt = i;
694 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
695 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
696 elt += count;
697 sel.quick_push (elt);
698 }
699 vec_perm_indices indices (sel, 2, count);
700 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
701 }
702
703 /* Verify if the scalar stmts STMTS are isomorphic, require data
704 permutation or are of unsupported types of operation. Return
705 true if they are, otherwise return false and indicate in *MATCHES
706 which stmts are not isomorphic to the first one. If MATCHES[0]
707 is false then this indicates the comparison could not be
708 carried out or the stmts will never be vectorized by SLP.
709
710 Note COND_EXPR is possibly isomorphic to another one after swapping its
711 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
712 the first stmt by swapping the two operands of comparison; set SWAP[i]
713 to 2 if stmt I is isormorphic to the first stmt by inverting the code
714 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
715 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
716
717 static bool
718 vect_build_slp_tree_1 (unsigned char *swap,
719 vec<stmt_vec_info> stmts, unsigned int group_size,
720 poly_uint64 *max_nunits, bool *matches,
721 bool *two_operators)
722 {
723 unsigned int i;
724 stmt_vec_info first_stmt_info = stmts[0];
725 enum tree_code first_stmt_code = ERROR_MARK;
726 enum tree_code alt_stmt_code = ERROR_MARK;
727 enum tree_code rhs_code = ERROR_MARK;
728 enum tree_code first_cond_code = ERROR_MARK;
729 tree lhs;
730 bool need_same_oprnds = false;
731 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
732 optab optab;
733 int icode;
734 machine_mode optab_op2_mode;
735 machine_mode vec_mode;
736 stmt_vec_info first_load = NULL, prev_first_load = NULL;
737 bool load_p = false;
738
739 /* For every stmt in NODE find its def stmt/s. */
740 stmt_vec_info stmt_info;
741 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
742 {
743 gimple *stmt = stmt_info->stmt;
744 swap[i] = 0;
745 matches[i] = false;
746
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
749
750 /* Fail to vectorize statements marked as unvectorizable. */
751 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
752 {
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
755 "Build SLP failed: unvectorizable statement %G",
756 stmt);
757 /* Fatal mismatch. */
758 matches[0] = false;
759 return false;
760 }
761
762 lhs = gimple_get_lhs (stmt);
763 if (lhs == NULL_TREE)
764 {
765 if (dump_enabled_p ())
766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
767 "Build SLP failed: not GIMPLE_ASSIGN nor "
768 "GIMPLE_CALL %G", stmt);
769 /* Fatal mismatch. */
770 matches[0] = false;
771 return false;
772 }
773
774 tree nunits_vectype;
775 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
776 &nunits_vectype)
777 || (nunits_vectype
778 && !vect_record_max_nunits (stmt_info, group_size,
779 nunits_vectype, max_nunits)))
780 {
781 /* Fatal mismatch. */
782 matches[0] = false;
783 return false;
784 }
785
786 gcc_assert (vectype);
787
788 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
789 {
790 rhs_code = CALL_EXPR;
791
792 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
793 load_p = true;
794 else if ((gimple_call_internal_p (call_stmt)
795 && (!vectorizable_internal_fn_p
796 (gimple_call_internal_fn (call_stmt))))
797 || gimple_call_tail_p (call_stmt)
798 || gimple_call_noreturn_p (call_stmt)
799 || !gimple_call_nothrow_p (call_stmt)
800 || gimple_call_chain (call_stmt))
801 {
802 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
804 "Build SLP failed: unsupported call type %G",
805 call_stmt);
806 /* Fatal mismatch. */
807 matches[0] = false;
808 return false;
809 }
810 }
811 else
812 {
813 rhs_code = gimple_assign_rhs_code (stmt);
814 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
815 }
816
817 /* Check the operation. */
818 if (i == 0)
819 {
820 first_stmt_code = rhs_code;
821
822 /* Shift arguments should be equal in all the packed stmts for a
823 vector shift with scalar shift operand. */
824 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
825 || rhs_code == LROTATE_EXPR
826 || rhs_code == RROTATE_EXPR)
827 {
828 if (vectype == boolean_type_node)
829 {
830 if (dump_enabled_p ())
831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
832 "Build SLP failed: shift of a"
833 " boolean.\n");
834 /* Fatal mismatch. */
835 matches[0] = false;
836 return false;
837 }
838
839 vec_mode = TYPE_MODE (vectype);
840
841 /* First see if we have a vector/vector shift. */
842 optab = optab_for_tree_code (rhs_code, vectype,
843 optab_vector);
844
845 if (!optab
846 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
847 {
848 /* No vector/vector shift, try for a vector/scalar shift. */
849 optab = optab_for_tree_code (rhs_code, vectype,
850 optab_scalar);
851
852 if (!optab)
853 {
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
856 "Build SLP failed: no optab.\n");
857 /* Fatal mismatch. */
858 matches[0] = false;
859 return false;
860 }
861 icode = (int) optab_handler (optab, vec_mode);
862 if (icode == CODE_FOR_nothing)
863 {
864 if (dump_enabled_p ())
865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
866 "Build SLP failed: "
867 "op not supported by target.\n");
868 /* Fatal mismatch. */
869 matches[0] = false;
870 return false;
871 }
872 optab_op2_mode = insn_data[icode].operand[2].mode;
873 if (!VECTOR_MODE_P (optab_op2_mode))
874 {
875 need_same_oprnds = true;
876 first_op1 = gimple_assign_rhs2 (stmt);
877 }
878 }
879 }
880 else if (rhs_code == WIDEN_LSHIFT_EXPR)
881 {
882 need_same_oprnds = true;
883 first_op1 = gimple_assign_rhs2 (stmt);
884 }
885 }
886 else
887 {
888 if (first_stmt_code != rhs_code
889 && alt_stmt_code == ERROR_MARK)
890 alt_stmt_code = rhs_code;
891 if (first_stmt_code != rhs_code
892 && (first_stmt_code != IMAGPART_EXPR
893 || rhs_code != REALPART_EXPR)
894 && (first_stmt_code != REALPART_EXPR
895 || rhs_code != IMAGPART_EXPR)
896 /* Handle mismatches in plus/minus by computing both
897 and merging the results. */
898 && !((first_stmt_code == PLUS_EXPR
899 || first_stmt_code == MINUS_EXPR)
900 && (alt_stmt_code == PLUS_EXPR
901 || alt_stmt_code == MINUS_EXPR)
902 && rhs_code == alt_stmt_code)
903 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
904 && (first_stmt_code == ARRAY_REF
905 || first_stmt_code == BIT_FIELD_REF
906 || first_stmt_code == INDIRECT_REF
907 || first_stmt_code == COMPONENT_REF
908 || first_stmt_code == MEM_REF)))
909 {
910 if (dump_enabled_p ())
911 {
912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
913 "Build SLP failed: different operation "
914 "in stmt %G", stmt);
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
916 "original stmt %G", first_stmt_info->stmt);
917 }
918 /* Mismatch. */
919 continue;
920 }
921
922 if (need_same_oprnds
923 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
924 {
925 if (dump_enabled_p ())
926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
927 "Build SLP failed: different shift "
928 "arguments in %G", stmt);
929 /* Mismatch. */
930 continue;
931 }
932
933 if (!load_p && rhs_code == CALL_EXPR)
934 {
935 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
936 as_a <gcall *> (stmt)))
937 {
938 if (dump_enabled_p ())
939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
940 "Build SLP failed: different calls in %G",
941 stmt);
942 /* Mismatch. */
943 continue;
944 }
945 }
946 }
947
948 /* Grouped store or load. */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
950 {
951 if (REFERENCE_CLASS_P (lhs))
952 {
953 /* Store. */
954 ;
955 }
956 else
957 {
958 /* Load. */
959 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
960 if (prev_first_load)
961 {
962 /* Check that there are no loads from different interleaving
963 chains in the same node. */
964 if (prev_first_load != first_load)
965 {
966 if (dump_enabled_p ())
967 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
968 vect_location,
969 "Build SLP failed: different "
970 "interleaving chains in one node %G",
971 stmt);
972 /* Mismatch. */
973 continue;
974 }
975 }
976 else
977 prev_first_load = first_load;
978 }
979 } /* Grouped access. */
980 else
981 {
982 if (load_p)
983 {
984 /* Not grouped load. */
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
987 "Build SLP failed: not grouped load %G", stmt);
988
989 /* FORNOW: Not grouped loads are not supported. */
990 /* Fatal mismatch. */
991 matches[0] = false;
992 return false;
993 }
994
995 /* Not memory operation. */
996 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
997 && TREE_CODE_CLASS (rhs_code) != tcc_unary
998 && TREE_CODE_CLASS (rhs_code) != tcc_expression
999 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1000 && rhs_code != VIEW_CONVERT_EXPR
1001 && rhs_code != CALL_EXPR)
1002 {
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1005 "Build SLP failed: operation unsupported %G",
1006 stmt);
1007 /* Fatal mismatch. */
1008 matches[0] = false;
1009 return false;
1010 }
1011
1012 if (rhs_code == COND_EXPR)
1013 {
1014 tree cond_expr = gimple_assign_rhs1 (stmt);
1015 enum tree_code cond_code = TREE_CODE (cond_expr);
1016 enum tree_code swap_code = ERROR_MARK;
1017 enum tree_code invert_code = ERROR_MARK;
1018
1019 if (i == 0)
1020 first_cond_code = TREE_CODE (cond_expr);
1021 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1022 {
1023 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1024 swap_code = swap_tree_comparison (cond_code);
1025 invert_code = invert_tree_comparison (cond_code, honor_nans);
1026 }
1027
1028 if (first_cond_code == cond_code)
1029 ;
1030 /* Isomorphic can be achieved by swapping. */
1031 else if (first_cond_code == swap_code)
1032 swap[i] = 1;
1033 /* Isomorphic can be achieved by inverting. */
1034 else if (first_cond_code == invert_code)
1035 swap[i] = 2;
1036 else
1037 {
1038 if (dump_enabled_p ())
1039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1040 "Build SLP failed: different"
1041 " operation %G", stmt);
1042 /* Mismatch. */
1043 continue;
1044 }
1045 }
1046 }
1047
1048 matches[i] = true;
1049 }
1050
1051 for (i = 0; i < group_size; ++i)
1052 if (!matches[i])
1053 return false;
1054
1055 /* If we allowed a two-operation SLP node verify the target can cope
1056 with the permute we are going to use. */
1057 if (alt_stmt_code != ERROR_MARK
1058 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1059 {
1060 if (vectype == boolean_type_node
1061 || !vect_two_operations_perm_ok_p (stmts, group_size,
1062 vectype, alt_stmt_code))
1063 {
1064 for (i = 0; i < group_size; ++i)
1065 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1066 {
1067 matches[i] = false;
1068 if (dump_enabled_p ())
1069 {
1070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1071 "Build SLP failed: different operation "
1072 "in stmt %G", stmts[i]->stmt);
1073 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1074 "original stmt %G", first_stmt_info->stmt);
1075 }
1076 }
1077 return false;
1078 }
1079 *two_operators = true;
1080 }
1081
1082 return true;
1083 }
1084
1085 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1086 Note we never remove apart from at destruction time so we do not
1087 need a special value for deleted that differs from empty. */
1088 struct bst_traits
1089 {
1090 typedef vec <stmt_vec_info> value_type;
1091 typedef vec <stmt_vec_info> compare_type;
1092 static inline hashval_t hash (value_type);
1093 static inline bool equal (value_type existing, value_type candidate);
1094 static inline bool is_empty (value_type x) { return !x.exists (); }
1095 static inline bool is_deleted (value_type x) { return !x.exists (); }
1096 static inline void mark_empty (value_type &x) { x.release (); }
1097 static inline void mark_deleted (value_type &x) { x.release (); }
1098 static inline void remove (value_type &x) { x.release (); }
1099 };
1100 inline hashval_t
1101 bst_traits::hash (value_type x)
1102 {
1103 inchash::hash h;
1104 for (unsigned i = 0; i < x.length (); ++i)
1105 h.add_int (gimple_uid (x[i]->stmt));
1106 return h.end ();
1107 }
1108 inline bool
1109 bst_traits::equal (value_type existing, value_type candidate)
1110 {
1111 if (existing.length () != candidate.length ())
1112 return false;
1113 for (unsigned i = 0; i < existing.length (); ++i)
1114 if (existing[i] != candidate[i])
1115 return false;
1116 return true;
1117 }
1118
1119 typedef hash_map <vec <gimple *>, slp_tree,
1120 simple_hashmap_traits <bst_traits, slp_tree> >
1121 scalar_stmts_to_slp_tree_map_t;
1122
1123 static slp_tree
1124 vect_build_slp_tree_2 (vec_info *vinfo,
1125 vec<stmt_vec_info> stmts, unsigned int group_size,
1126 poly_uint64 *max_nunits,
1127 bool *matches, unsigned *npermutes, unsigned *tree_size,
1128 scalar_stmts_to_slp_tree_map_t *bst_map);
1129
1130 static slp_tree
1131 vect_build_slp_tree (vec_info *vinfo,
1132 vec<stmt_vec_info> stmts, unsigned int group_size,
1133 poly_uint64 *max_nunits,
1134 bool *matches, unsigned *npermutes, unsigned *tree_size,
1135 scalar_stmts_to_slp_tree_map_t *bst_map)
1136 {
1137 if (slp_tree *leader = bst_map->get (stmts))
1138 {
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1141 *leader ? "" : "failed ", *leader);
1142 if (*leader)
1143 {
1144 (*leader)->refcnt++;
1145 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1146 }
1147 return *leader;
1148 }
1149 poly_uint64 this_max_nunits = 1;
1150 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1151 matches, npermutes, tree_size, bst_map);
1152 if (res)
1153 {
1154 res->max_nunits = this_max_nunits;
1155 vect_update_max_nunits (max_nunits, this_max_nunits);
1156 /* Keep a reference for the bst_map use. */
1157 res->refcnt++;
1158 }
1159 bst_map->put (stmts.copy (), res);
1160 return res;
1161 }
1162
1163 /* Recursively build an SLP tree starting from NODE.
1164 Fail (and return a value not equal to zero) if def-stmts are not
1165 isomorphic, require data permutation or are of unsupported types of
1166 operation. Otherwise, return 0.
1167 The value returned is the depth in the SLP tree where a mismatch
1168 was found. */
1169
1170 static slp_tree
1171 vect_build_slp_tree_2 (vec_info *vinfo,
1172 vec<stmt_vec_info> stmts, unsigned int group_size,
1173 poly_uint64 *max_nunits,
1174 bool *matches, unsigned *npermutes, unsigned *tree_size,
1175 scalar_stmts_to_slp_tree_map_t *bst_map)
1176 {
1177 unsigned nops, i, this_tree_size = 0;
1178 poly_uint64 this_max_nunits = *max_nunits;
1179 slp_tree node;
1180
1181 matches[0] = false;
1182
1183 stmt_vec_info stmt_info = stmts[0];
1184 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1185 nops = gimple_call_num_args (stmt);
1186 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1187 {
1188 nops = gimple_num_ops (stmt) - 1;
1189 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1190 nops++;
1191 }
1192 else if (is_a <gphi *> (stmt_info->stmt))
1193 nops = 0;
1194 else
1195 return NULL;
1196
1197 /* If the SLP node is a PHI (induction or reduction), terminate
1198 the recursion. */
1199 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1200 {
1201 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1202 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
1203 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1204 return NULL;
1205
1206 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1207 /* Induction from different IVs is not supported. */
1208 if (def_type == vect_induction_def)
1209 {
1210 stmt_vec_info other_info;
1211 FOR_EACH_VEC_ELT (stmts, i, other_info)
1212 if (stmt_info != other_info)
1213 return NULL;
1214 }
1215 else if (def_type == vect_reduction_def
1216 || def_type == vect_double_reduction_def
1217 || def_type == vect_nested_cycle)
1218 {
1219 /* Else def types have to match. */
1220 stmt_vec_info other_info;
1221 FOR_EACH_VEC_ELT (stmts, i, other_info)
1222 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1223 return NULL;
1224 }
1225 else
1226 return NULL;
1227 (*tree_size)++;
1228 node = vect_create_new_slp_node (stmts);
1229 return node;
1230 }
1231
1232
1233 bool two_operators = false;
1234 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1235 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1236 &this_max_nunits, matches, &two_operators))
1237 return NULL;
1238
1239 /* If the SLP node is a load, terminate the recursion unless masked. */
1240 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1241 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1242 {
1243 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1244 {
1245 /* Masked load. */
1246 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1247 nops = 1;
1248 }
1249 else
1250 {
1251 *max_nunits = this_max_nunits;
1252 (*tree_size)++;
1253 node = vect_create_new_slp_node (stmts);
1254 return node;
1255 }
1256 }
1257
1258 /* Get at the operands, verifying they are compatible. */
1259 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1260 slp_oprnd_info oprnd_info;
1261 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1262 {
1263 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1264 stmts, i, &oprnds_info);
1265 if (res != 0)
1266 matches[(res == -1) ? 0 : i] = false;
1267 if (!matches[0])
1268 break;
1269 }
1270 for (i = 0; i < group_size; ++i)
1271 if (!matches[i])
1272 {
1273 vect_free_oprnd_info (oprnds_info);
1274 return NULL;
1275 }
1276
1277 auto_vec<slp_tree, 4> children;
1278
1279 stmt_info = stmts[0];
1280
1281 /* Create SLP_TREE nodes for the definition node/s. */
1282 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1283 {
1284 slp_tree child;
1285 unsigned old_tree_size = this_tree_size;
1286 unsigned int j;
1287
1288 if (oprnd_info->first_dt == vect_uninitialized_def)
1289 {
1290 /* COND_EXPR have one too many eventually if the condition
1291 is a SSA name. */
1292 gcc_assert (i == 3 && nops == 4);
1293 continue;
1294 }
1295
1296 if (oprnd_info->first_dt != vect_internal_def
1297 && oprnd_info->first_dt != vect_reduction_def
1298 && oprnd_info->first_dt != vect_induction_def)
1299 {
1300 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1301 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1302 oprnd_info->ops = vNULL;
1303 children.safe_push (invnode);
1304 continue;
1305 }
1306
1307 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1308 group_size, &this_max_nunits,
1309 matches, npermutes,
1310 &this_tree_size, bst_map)) != NULL)
1311 {
1312 /* If we have all children of child built up from scalars then just
1313 throw that away and build it up this node from scalars. */
1314 if (is_a <bb_vec_info> (vinfo)
1315 && !SLP_TREE_CHILDREN (child).is_empty ()
1316 /* ??? Rejecting patterns this way doesn't work. We'd have to
1317 do extra work to cancel the pattern so the uses see the
1318 scalar version. */
1319 && !oprnd_info->any_pattern)
1320 {
1321 slp_tree grandchild;
1322
1323 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1324 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1325 break;
1326 if (!grandchild)
1327 {
1328 /* Roll back. */
1329 this_tree_size = old_tree_size;
1330 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1331 vect_free_slp_tree (grandchild, false);
1332 SLP_TREE_CHILDREN (child).truncate (0);
1333
1334 if (dump_enabled_p ())
1335 dump_printf_loc (MSG_NOTE, vect_location,
1336 "Building parent vector operands from "
1337 "scalars instead\n");
1338 oprnd_info->def_stmts = vNULL;
1339 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1340 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1341 oprnd_info->ops = vNULL;
1342 ++this_tree_size;
1343 children.safe_push (child);
1344 continue;
1345 }
1346 }
1347
1348 oprnd_info->def_stmts = vNULL;
1349 children.safe_push (child);
1350 continue;
1351 }
1352
1353 /* If the SLP build failed fatally and we analyze a basic-block
1354 simply treat nodes we fail to build as externally defined
1355 (and thus build vectors from the scalar defs).
1356 The cost model will reject outright expensive cases.
1357 ??? This doesn't treat cases where permutation ultimatively
1358 fails (or we don't try permutation below). Ideally we'd
1359 even compute a permutation that will end up with the maximum
1360 SLP tree size... */
1361 if (is_a <bb_vec_info> (vinfo)
1362 && !matches[0]
1363 /* ??? Rejecting patterns this way doesn't work. We'd have to
1364 do extra work to cancel the pattern so the uses see the
1365 scalar version. */
1366 && !is_pattern_stmt_p (stmt_info)
1367 && !oprnd_info->any_pattern)
1368 {
1369 if (dump_enabled_p ())
1370 dump_printf_loc (MSG_NOTE, vect_location,
1371 "Building vector operands from scalars\n");
1372 this_tree_size++;
1373 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1374 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1375 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1376 children.safe_push (child);
1377 oprnd_info->ops = vNULL;
1378 oprnd_info->def_stmts = vNULL;
1379 continue;
1380 }
1381
1382 /* If the SLP build for operand zero failed and operand zero
1383 and one can be commutated try that for the scalar stmts
1384 that failed the match. */
1385 if (i == 0
1386 /* A first scalar stmt mismatch signals a fatal mismatch. */
1387 && matches[0]
1388 /* ??? For COND_EXPRs we can swap the comparison operands
1389 as well as the arms under some constraints. */
1390 && nops == 2
1391 && oprnds_info[1]->first_dt == vect_internal_def
1392 && is_gimple_assign (stmt_info->stmt)
1393 /* Swapping operands for reductions breaks assumptions later on. */
1394 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1395 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1396 /* Do so only if the number of not successful permutes was nor more
1397 than a cut-ff as re-trying the recursive match on
1398 possibly each level of the tree would expose exponential
1399 behavior. */
1400 && *npermutes < 4)
1401 {
1402 /* See whether we can swap the matching or the non-matching
1403 stmt operands. */
1404 bool swap_not_matching = true;
1405 do
1406 {
1407 for (j = 0; j < group_size; ++j)
1408 {
1409 if (matches[j] != !swap_not_matching)
1410 continue;
1411 stmt_vec_info stmt_info = stmts[j];
1412 /* Verify if we can swap operands of this stmt. */
1413 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1414 if (!stmt
1415 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1416 {
1417 if (!swap_not_matching)
1418 goto fail;
1419 swap_not_matching = false;
1420 break;
1421 }
1422 }
1423 }
1424 while (j != group_size);
1425
1426 /* Swap mismatched definition stmts. */
1427 if (dump_enabled_p ())
1428 dump_printf_loc (MSG_NOTE, vect_location,
1429 "Re-trying with swapped operands of stmts ");
1430 for (j = 0; j < group_size; ++j)
1431 if (matches[j] == !swap_not_matching)
1432 {
1433 std::swap (oprnds_info[0]->def_stmts[j],
1434 oprnds_info[1]->def_stmts[j]);
1435 std::swap (oprnds_info[0]->ops[j],
1436 oprnds_info[1]->ops[j]);
1437 if (dump_enabled_p ())
1438 dump_printf (MSG_NOTE, "%d ", j);
1439 }
1440 if (dump_enabled_p ())
1441 dump_printf (MSG_NOTE, "\n");
1442 /* And try again with scratch 'matches' ... */
1443 bool *tem = XALLOCAVEC (bool, group_size);
1444 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1445 group_size, &this_max_nunits,
1446 tem, npermutes,
1447 &this_tree_size, bst_map)) != NULL)
1448 {
1449 /* If we have all children of child built up from scalars then
1450 just throw that away and build it up this node from scalars. */
1451 if (is_a <bb_vec_info> (vinfo)
1452 && !SLP_TREE_CHILDREN (child).is_empty ()
1453 /* ??? Rejecting patterns this way doesn't work. We'd have
1454 to do extra work to cancel the pattern so the uses see the
1455 scalar version. */
1456 && !oprnd_info->any_pattern)
1457 {
1458 unsigned int j;
1459 slp_tree grandchild;
1460
1461 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1462 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1463 break;
1464 if (!grandchild)
1465 {
1466 /* Roll back. */
1467 this_tree_size = old_tree_size;
1468 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1469 vect_free_slp_tree (grandchild, false);
1470 SLP_TREE_CHILDREN (child).truncate (0);
1471
1472 if (dump_enabled_p ())
1473 dump_printf_loc (MSG_NOTE, vect_location,
1474 "Building parent vector operands from "
1475 "scalars instead\n");
1476 oprnd_info->def_stmts = vNULL;
1477 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1478 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1479 oprnd_info->ops = vNULL;
1480 ++this_tree_size;
1481 children.safe_push (child);
1482 continue;
1483 }
1484 }
1485
1486 oprnd_info->def_stmts = vNULL;
1487 children.safe_push (child);
1488 continue;
1489 }
1490
1491 ++*npermutes;
1492 }
1493
1494 fail:
1495 gcc_assert (child == NULL);
1496 FOR_EACH_VEC_ELT (children, j, child)
1497 vect_free_slp_tree (child, false);
1498 vect_free_oprnd_info (oprnds_info);
1499 return NULL;
1500 }
1501
1502 vect_free_oprnd_info (oprnds_info);
1503
1504 *tree_size += this_tree_size + 1;
1505 *max_nunits = this_max_nunits;
1506
1507 node = vect_create_new_slp_node (stmts);
1508 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1509 SLP_TREE_CHILDREN (node).splice (children);
1510 return node;
1511 }
1512
1513 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1514
1515 static void
1516 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1517 slp_tree node, hash_set<slp_tree> &visited)
1518 {
1519 unsigned i;
1520 stmt_vec_info stmt_info;
1521 slp_tree child;
1522 tree op;
1523
1524 if (visited.add (node))
1525 return;
1526
1527 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1528 dump_user_location_t user_loc = loc.get_user_location ();
1529 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
1530 SLP_TREE_DEF_TYPE (node) == vect_external_def
1531 ? " (external)"
1532 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1533 ? " (constant)"
1534 : ""), node,
1535 estimated_poly_value (node->max_nunits));
1536 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1537 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1538 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1539 else
1540 {
1541 dump_printf_loc (metadata, user_loc, "\t{ ");
1542 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1543 dump_printf (metadata, "%T%s ", op,
1544 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1545 dump_printf (metadata, "}\n");
1546 }
1547 if (SLP_TREE_CHILDREN (node).is_empty ())
1548 return;
1549 dump_printf_loc (metadata, user_loc, "\tchildren");
1550 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1551 dump_printf (dump_kind, " %p", (void *)child);
1552 dump_printf (dump_kind, "\n");
1553 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1554 vect_print_slp_tree (dump_kind, loc, child, visited);
1555 }
1556
1557 static void
1558 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1559 slp_tree node)
1560 {
1561 hash_set<slp_tree> visited;
1562 vect_print_slp_tree (dump_kind, loc, node, visited);
1563 }
1564
1565 /* Mark the tree rooted at NODE with PURE_SLP. */
1566
1567 static void
1568 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1569 {
1570 int i;
1571 stmt_vec_info stmt_info;
1572 slp_tree child;
1573
1574 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1575 return;
1576
1577 if (visited.add (node))
1578 return;
1579
1580 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1581 STMT_SLP_TYPE (stmt_info) = pure_slp;
1582
1583 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1584 vect_mark_slp_stmts (child, visited);
1585 }
1586
1587 static void
1588 vect_mark_slp_stmts (slp_tree node)
1589 {
1590 hash_set<slp_tree> visited;
1591 vect_mark_slp_stmts (node, visited);
1592 }
1593
1594 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1595
1596 static void
1597 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1598 {
1599 int i;
1600 stmt_vec_info stmt_info;
1601 slp_tree child;
1602
1603 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1604 return;
1605
1606 if (visited.add (node))
1607 return;
1608
1609 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1610 {
1611 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1612 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1613 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1614 }
1615
1616 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1617 vect_mark_slp_stmts_relevant (child, visited);
1618 }
1619
1620 static void
1621 vect_mark_slp_stmts_relevant (slp_tree node)
1622 {
1623 hash_set<slp_tree> visited;
1624 vect_mark_slp_stmts_relevant (node, visited);
1625 }
1626
1627
1628 /* Rearrange the statements of NODE according to PERMUTATION. */
1629
1630 static void
1631 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1632 vec<unsigned> permutation,
1633 hash_set<slp_tree> &visited)
1634 {
1635 unsigned int i;
1636 slp_tree child;
1637
1638 if (visited.add (node))
1639 return;
1640
1641 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1642 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1643
1644 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1645 {
1646 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1647 vec<stmt_vec_info> tmp_stmts;
1648 tmp_stmts.create (group_size);
1649 tmp_stmts.quick_grow (group_size);
1650 stmt_vec_info stmt_info;
1651 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1652 tmp_stmts[permutation[i]] = stmt_info;
1653 SLP_TREE_SCALAR_STMTS (node).release ();
1654 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1655 }
1656 if (SLP_TREE_SCALAR_OPS (node).exists ())
1657 {
1658 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1659 vec<tree> tmp_ops;
1660 tmp_ops.create (group_size);
1661 tmp_ops.quick_grow (group_size);
1662 tree op;
1663 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1664 tmp_ops[permutation[i]] = op;
1665 SLP_TREE_SCALAR_OPS (node).release ();
1666 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1667 }
1668 }
1669
1670
1671 /* Attempt to reorder stmts in a reduction chain so that we don't
1672 require any load permutation. Return true if that was possible,
1673 otherwise return false. */
1674
1675 static bool
1676 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1677 {
1678 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1679 unsigned int i, j;
1680 unsigned int lidx;
1681 slp_tree node, load;
1682
1683 /* Compare all the permutation sequences to the first one. We know
1684 that at least one load is permuted. */
1685 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1686 if (!node->load_permutation.exists ())
1687 return false;
1688 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1689 {
1690 if (!load->load_permutation.exists ())
1691 return false;
1692 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1693 if (lidx != node->load_permutation[j])
1694 return false;
1695 }
1696
1697 /* Check that the loads in the first sequence are different and there
1698 are no gaps between them. */
1699 auto_sbitmap load_index (group_size);
1700 bitmap_clear (load_index);
1701 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1702 {
1703 if (lidx >= group_size)
1704 return false;
1705 if (bitmap_bit_p (load_index, lidx))
1706 return false;
1707
1708 bitmap_set_bit (load_index, lidx);
1709 }
1710 for (i = 0; i < group_size; i++)
1711 if (!bitmap_bit_p (load_index, i))
1712 return false;
1713
1714 /* This permutation is valid for reduction. Since the order of the
1715 statements in the nodes is not important unless they are memory
1716 accesses, we can rearrange the statements in all the nodes
1717 according to the order of the loads. */
1718 hash_set<slp_tree> visited;
1719 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1720 node->load_permutation, visited);
1721
1722 /* We are done, no actual permutations need to be generated. */
1723 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1724 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1725 {
1726 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1727 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1728 /* But we have to keep those permutations that are required because
1729 of handling of gaps. */
1730 if (known_eq (unrolling_factor, 1U)
1731 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1732 && DR_GROUP_GAP (first_stmt_info) == 0))
1733 SLP_TREE_LOAD_PERMUTATION (node).release ();
1734 else
1735 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1736 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1737 }
1738
1739 return true;
1740 }
1741
1742 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1743
1744 static void
1745 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1746 hash_set<slp_tree> &visited)
1747 {
1748 if (visited.add (node))
1749 return;
1750
1751 if (SLP_TREE_CHILDREN (node).length () == 0)
1752 {
1753 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1754 return;
1755 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1756 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1757 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1758 SLP_INSTANCE_LOADS (inst).safe_push (node);
1759 }
1760 else
1761 {
1762 unsigned i;
1763 slp_tree child;
1764 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1765 vect_gather_slp_loads (inst, child, visited);
1766 }
1767 }
1768
1769 static void
1770 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1771 {
1772 hash_set<slp_tree> visited;
1773 vect_gather_slp_loads (inst, node, visited);
1774 }
1775
1776 /* Check if the required load permutations in the SLP instance
1777 SLP_INSTN are supported. */
1778
1779 static bool
1780 vect_supported_load_permutation_p (slp_instance slp_instn)
1781 {
1782 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1783 unsigned int i, j, k, next;
1784 slp_tree node;
1785
1786 if (dump_enabled_p ())
1787 {
1788 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1789 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1790 if (node->load_permutation.exists ())
1791 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1792 dump_printf (MSG_NOTE, "%d ", next);
1793 else
1794 for (k = 0; k < group_size; ++k)
1795 dump_printf (MSG_NOTE, "%d ", k);
1796 dump_printf (MSG_NOTE, "\n");
1797 }
1798
1799 /* In case of reduction every load permutation is allowed, since the order
1800 of the reduction statements is not important (as opposed to the case of
1801 grouped stores). The only condition we need to check is that all the
1802 load nodes are of the same size and have the same permutation (and then
1803 rearrange all the nodes of the SLP instance according to this
1804 permutation). */
1805
1806 /* Check that all the load nodes are of the same size. */
1807 /* ??? Can't we assert this? */
1808 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1809 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1810 return false;
1811
1812 node = SLP_INSTANCE_TREE (slp_instn);
1813 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1814
1815 /* Reduction (there are no data-refs in the root).
1816 In reduction chain the order of the loads is not important. */
1817 if (!STMT_VINFO_DATA_REF (stmt_info)
1818 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1819 vect_attempt_slp_rearrange_stmts (slp_instn);
1820
1821 /* In basic block vectorization we allow any subchain of an interleaving
1822 chain.
1823 FORNOW: not supported in loop SLP because of realignment compications. */
1824 if (STMT_VINFO_BB_VINFO (stmt_info))
1825 {
1826 /* Check whether the loads in an instance form a subchain and thus
1827 no permutation is necessary. */
1828 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1829 {
1830 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1831 continue;
1832 bool subchain_p = true;
1833 stmt_vec_info next_load_info = NULL;
1834 stmt_vec_info load_info;
1835 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1836 {
1837 if (j != 0
1838 && (next_load_info != load_info
1839 || DR_GROUP_GAP (load_info) != 1))
1840 {
1841 subchain_p = false;
1842 break;
1843 }
1844 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1845 }
1846 if (subchain_p)
1847 SLP_TREE_LOAD_PERMUTATION (node).release ();
1848 else
1849 {
1850 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1851 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1852 unsigned HOST_WIDE_INT nunits;
1853 unsigned k, maxk = 0;
1854 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1855 if (k > maxk)
1856 maxk = k;
1857 /* In BB vectorization we may not actually use a loaded vector
1858 accessing elements in excess of DR_GROUP_SIZE. */
1859 tree vectype = STMT_VINFO_VECTYPE (group_info);
1860 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1861 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1862 {
1863 if (dump_enabled_p ())
1864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1865 "BB vectorization with gaps at the end of "
1866 "a load is not supported\n");
1867 return false;
1868 }
1869
1870 /* Verify the permutation can be generated. */
1871 vec<tree> tem;
1872 unsigned n_perms;
1873 if (!vect_transform_slp_perm_load (node, tem, NULL,
1874 1, slp_instn, true, &n_perms))
1875 {
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1878 vect_location,
1879 "unsupported load permutation\n");
1880 return false;
1881 }
1882 }
1883 }
1884 return true;
1885 }
1886
1887 /* For loop vectorization verify we can generate the permutation. Be
1888 conservative about the vectorization factor, there are permutations
1889 that will use three vector inputs only starting from a specific factor
1890 and the vectorization factor is not yet final.
1891 ??? The SLP instance unrolling factor might not be the maximum one. */
1892 unsigned n_perms;
1893 poly_uint64 test_vf
1894 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1895 LOOP_VINFO_VECT_FACTOR
1896 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1897 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1898 if (node->load_permutation.exists ()
1899 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1900 slp_instn, true, &n_perms))
1901 return false;
1902
1903 return true;
1904 }
1905
1906
1907 /* Find the last store in SLP INSTANCE. */
1908
1909 stmt_vec_info
1910 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1911 {
1912 stmt_vec_info last = NULL;
1913 stmt_vec_info stmt_vinfo;
1914
1915 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1916 {
1917 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1918 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1919 }
1920
1921 return last;
1922 }
1923
1924 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1925 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1926 (also containing the first GROUP1_SIZE stmts, since stores are
1927 consecutive), the second containing the remainder.
1928 Return the first stmt in the second group. */
1929
1930 static stmt_vec_info
1931 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1932 {
1933 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1934 gcc_assert (group1_size > 0);
1935 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1936 gcc_assert (group2_size > 0);
1937 DR_GROUP_SIZE (first_vinfo) = group1_size;
1938
1939 stmt_vec_info stmt_info = first_vinfo;
1940 for (unsigned i = group1_size; i > 1; i--)
1941 {
1942 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1943 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1944 }
1945 /* STMT is now the last element of the first group. */
1946 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1947 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1948
1949 DR_GROUP_SIZE (group2) = group2_size;
1950 for (stmt_info = group2; stmt_info;
1951 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1952 {
1953 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1954 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1955 }
1956
1957 /* For the second group, the DR_GROUP_GAP is that before the original group,
1958 plus skipping over the first vector. */
1959 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1960
1961 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1962 DR_GROUP_GAP (first_vinfo) += group2_size;
1963
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1966 group1_size, group2_size);
1967
1968 return group2;
1969 }
1970
1971 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1972 statements and a vector of NUNITS elements. */
1973
1974 static poly_uint64
1975 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1976 {
1977 return exact_div (common_multiple (nunits, group_size), group_size);
1978 }
1979
1980 /* Analyze an SLP instance starting from a group of grouped stores. Call
1981 vect_build_slp_tree to build a tree of packed stmts if possible.
1982 Return FALSE if it's impossible to SLP any stmt in the loop. */
1983
1984 static bool
1985 vect_analyze_slp_instance (vec_info *vinfo,
1986 stmt_vec_info stmt_info, unsigned max_tree_size)
1987 {
1988 slp_instance new_instance;
1989 slp_tree node;
1990 unsigned int group_size;
1991 tree vectype, scalar_type = NULL_TREE;
1992 unsigned int i;
1993 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1994 vec<stmt_vec_info> scalar_stmts;
1995 bool constructor = false;
1996
1997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1998 {
1999 scalar_type = TREE_TYPE (DR_REF (dr));
2000 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
2001 group_size = DR_GROUP_SIZE (stmt_info);
2002 }
2003 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2004 {
2005 gcc_assert (is_a <loop_vec_info> (vinfo));
2006 vectype = STMT_VINFO_VECTYPE (stmt_info);
2007 group_size = REDUC_GROUP_SIZE (stmt_info);
2008 }
2009 else if (is_gimple_assign (stmt_info->stmt)
2010 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2011 {
2012 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2013 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2014 constructor = true;
2015 }
2016 else
2017 {
2018 gcc_assert (is_a <loop_vec_info> (vinfo));
2019 vectype = STMT_VINFO_VECTYPE (stmt_info);
2020 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2021 }
2022
2023 if (!vectype)
2024 {
2025 if (dump_enabled_p ())
2026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2027 "Build SLP failed: unsupported data-type %T\n",
2028 scalar_type);
2029
2030 return false;
2031 }
2032 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2033
2034 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2035 scalar_stmts.create (group_size);
2036 stmt_vec_info next_info = stmt_info;
2037 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2038 {
2039 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2040 while (next_info)
2041 {
2042 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2043 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2044 }
2045 }
2046 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2047 {
2048 /* Collect the reduction stmts and store them in
2049 SLP_TREE_SCALAR_STMTS. */
2050 while (next_info)
2051 {
2052 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2053 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2054 }
2055 /* Mark the first element of the reduction chain as reduction to properly
2056 transform the node. In the reduction analysis phase only the last
2057 element of the chain is marked as reduction. */
2058 STMT_VINFO_DEF_TYPE (stmt_info)
2059 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
2060 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2061 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
2062 }
2063 else if (constructor)
2064 {
2065 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2066 tree val;
2067 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2068 {
2069 if (TREE_CODE (val) == SSA_NAME)
2070 {
2071 gimple* def = SSA_NAME_DEF_STMT (val);
2072 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2073 /* Value is defined in another basic block. */
2074 if (!def_info)
2075 return false;
2076 scalar_stmts.safe_push (def_info);
2077 }
2078 else
2079 return false;
2080 }
2081 }
2082 else
2083 {
2084 /* Collect reduction statements. */
2085 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2086 for (i = 0; reductions.iterate (i, &next_info); i++)
2087 scalar_stmts.safe_push (next_info);
2088 }
2089
2090 /* Build the tree for the SLP instance. */
2091 bool *matches = XALLOCAVEC (bool, group_size);
2092 unsigned npermutes = 0;
2093 scalar_stmts_to_slp_tree_map_t *bst_map
2094 = new scalar_stmts_to_slp_tree_map_t ();
2095 poly_uint64 max_nunits = nunits;
2096 unsigned tree_size = 0;
2097 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2098 &max_nunits, matches, &npermutes,
2099 &tree_size, bst_map);
2100 /* The map keeps a reference on SLP nodes built, release that. */
2101 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2102 it != bst_map->end (); ++it)
2103 if ((*it).second)
2104 vect_free_slp_tree ((*it).second, false);
2105 delete bst_map;
2106 if (node != NULL)
2107 {
2108 /* If this is a reduction chain with a conversion in front
2109 amend the SLP tree with a node for that. */
2110 if (!dr
2111 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2112 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2113 {
2114 /* Get at the conversion stmt - we know it's the single use
2115 of the last stmt of the reduction chain. */
2116 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2117 use_operand_p use_p;
2118 gimple *use_stmt;
2119 bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
2120 gcc_assert (r);
2121 next_info = vinfo->lookup_stmt (use_stmt);
2122 next_info = vect_stmt_to_vectorize (next_info);
2123 scalar_stmts = vNULL;
2124 scalar_stmts.create (group_size);
2125 for (unsigned i = 0; i < group_size; ++i)
2126 scalar_stmts.quick_push (next_info);
2127 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2128 SLP_TREE_CHILDREN (conv).quick_push (node);
2129 node = conv;
2130 /* We also have to fake this conversion stmt as SLP reduction group
2131 so we don't have to mess with too much code elsewhere. */
2132 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2133 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2134 }
2135
2136 /* Calculate the unrolling factor based on the smallest type. */
2137 poly_uint64 unrolling_factor
2138 = calculate_unrolling_factor (max_nunits, group_size);
2139
2140 if (maybe_ne (unrolling_factor, 1U)
2141 && is_a <bb_vec_info> (vinfo))
2142 {
2143 unsigned HOST_WIDE_INT const_max_nunits;
2144 if (!max_nunits.is_constant (&const_max_nunits)
2145 || const_max_nunits > group_size)
2146 {
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2149 "Build SLP failed: store group "
2150 "size not a multiple of the vector size "
2151 "in basic block SLP\n");
2152 vect_free_slp_tree (node, false);
2153 return false;
2154 }
2155 /* Fatal mismatch. */
2156 matches[group_size / const_max_nunits * const_max_nunits] = false;
2157 vect_free_slp_tree (node, false);
2158 }
2159 else
2160 {
2161 /* Create a new SLP instance. */
2162 new_instance = XNEW (class _slp_instance);
2163 SLP_INSTANCE_TREE (new_instance) = node;
2164 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2165 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2166 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2167 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2168
2169 vect_gather_slp_loads (new_instance, node);
2170 if (dump_enabled_p ())
2171 dump_printf_loc (MSG_NOTE, vect_location,
2172 "SLP size %u vs. limit %u.\n",
2173 tree_size, max_tree_size);
2174
2175 /* Compute the load permutation. */
2176 slp_tree load_node;
2177 bool loads_permuted = false;
2178 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2179 {
2180 vec<unsigned> load_permutation;
2181 int j;
2182 stmt_vec_info load_info;
2183 bool this_load_permuted = false;
2184 load_permutation.create (group_size);
2185 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2186 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2187 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2188 {
2189 int load_place = vect_get_place_in_interleaving_chain
2190 (load_info, first_stmt_info);
2191 gcc_assert (load_place != -1);
2192 if (load_place != j)
2193 this_load_permuted = true;
2194 load_permutation.safe_push (load_place);
2195 }
2196 if (!this_load_permuted
2197 /* The load requires permutation when unrolling exposes
2198 a gap either because the group is larger than the SLP
2199 group-size or because there is a gap between the groups. */
2200 && (known_eq (unrolling_factor, 1U)
2201 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2202 && DR_GROUP_GAP (first_stmt_info) == 0)))
2203 {
2204 load_permutation.release ();
2205 continue;
2206 }
2207 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2208 loads_permuted = true;
2209 }
2210
2211 if (loads_permuted)
2212 {
2213 if (!vect_supported_load_permutation_p (new_instance))
2214 {
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2217 "Build SLP failed: unsupported load "
2218 "permutation %G", stmt_info->stmt);
2219 vect_free_slp_instance (new_instance, false);
2220 return false;
2221 }
2222 }
2223
2224 /* If the loads and stores can be handled with load/store-lan
2225 instructions do not generate this SLP instance. */
2226 if (is_a <loop_vec_info> (vinfo)
2227 && loads_permuted
2228 && dr && vect_store_lanes_supported (vectype, group_size, false))
2229 {
2230 slp_tree load_node;
2231 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2232 {
2233 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2234 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2235 /* Use SLP for strided accesses (or if we can't load-lanes). */
2236 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2237 || ! vect_load_lanes_supported
2238 (STMT_VINFO_VECTYPE (stmt_vinfo),
2239 DR_GROUP_SIZE (stmt_vinfo), false))
2240 break;
2241 }
2242 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2243 {
2244 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246 "Built SLP cancelled: can use "
2247 "load/store-lanes\n");
2248 vect_free_slp_instance (new_instance, false);
2249 return false;
2250 }
2251 }
2252
2253 vinfo->slp_instances.safe_push (new_instance);
2254
2255 if (dump_enabled_p ())
2256 {
2257 dump_printf_loc (MSG_NOTE, vect_location,
2258 "Final SLP tree for instance:\n");
2259 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2260 }
2261
2262 return true;
2263 }
2264 }
2265 else
2266 {
2267 /* Failed to SLP. */
2268 /* Free the allocated memory. */
2269 scalar_stmts.release ();
2270 }
2271
2272 /* For basic block SLP, try to break the group up into multiples of the
2273 vector size. */
2274 unsigned HOST_WIDE_INT const_nunits;
2275 if (is_a <bb_vec_info> (vinfo)
2276 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2277 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2278 && nunits.is_constant (&const_nunits))
2279 {
2280 /* We consider breaking the group only on VF boundaries from the existing
2281 start. */
2282 for (i = 0; i < group_size; i++)
2283 if (!matches[i]) break;
2284
2285 if (i >= const_nunits && i < group_size)
2286 {
2287 /* Split into two groups at the first vector boundary before i. */
2288 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2289 unsigned group1_size = i & ~(const_nunits - 1);
2290
2291 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2292 group1_size);
2293 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2294 max_tree_size);
2295 /* If the first non-match was in the middle of a vector,
2296 skip the rest of that vector. */
2297 if (group1_size < i)
2298 {
2299 i = group1_size + const_nunits;
2300 if (i < group_size)
2301 rest = vect_split_slp_store_group (rest, const_nunits);
2302 }
2303 if (i < group_size)
2304 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2305 return res;
2306 }
2307 /* Even though the first vector did not all match, we might be able to SLP
2308 (some) of the remainder. FORNOW ignore this possibility. */
2309 }
2310
2311 return false;
2312 }
2313
2314
2315 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2316 trees of packed scalar stmts if SLP is possible. */
2317
2318 opt_result
2319 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2320 {
2321 unsigned int i;
2322 stmt_vec_info first_element;
2323
2324 DUMP_VECT_SCOPE ("vect_analyze_slp");
2325
2326 /* Find SLP sequences starting from groups of grouped stores. */
2327 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2328 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2329
2330 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2331 {
2332 if (loop_vinfo->reduction_chains.length () > 0)
2333 {
2334 /* Find SLP sequences starting from reduction chains. */
2335 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2336 if (! vect_analyze_slp_instance (vinfo, first_element,
2337 max_tree_size))
2338 {
2339 /* Dissolve reduction chain group. */
2340 stmt_vec_info vinfo = first_element;
2341 stmt_vec_info last = NULL;
2342 while (vinfo)
2343 {
2344 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2345 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2346 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2347 last = vinfo;
2348 vinfo = next;
2349 }
2350 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2351 /* It can be still vectorized as part of an SLP reduction. */
2352 loop_vinfo->reductions.safe_push (last);
2353 }
2354 }
2355
2356 /* Find SLP sequences starting from groups of reductions. */
2357 if (loop_vinfo->reductions.length () > 1)
2358 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2359 max_tree_size);
2360 }
2361
2362 return opt_result::success ();
2363 }
2364
2365
2366 /* For each possible SLP instance decide whether to SLP it and calculate overall
2367 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2368 least one instance. */
2369
2370 bool
2371 vect_make_slp_decision (loop_vec_info loop_vinfo)
2372 {
2373 unsigned int i;
2374 poly_uint64 unrolling_factor = 1;
2375 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2376 slp_instance instance;
2377 int decided_to_slp = 0;
2378
2379 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2380
2381 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2382 {
2383 /* FORNOW: SLP if you can. */
2384 /* All unroll factors have the form:
2385
2386 GET_MODE_SIZE (vinfo->vector_mode) * X
2387
2388 for some rational X, so they must have a common multiple. */
2389 unrolling_factor
2390 = force_common_multiple (unrolling_factor,
2391 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2392
2393 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2394 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2395 loop-based vectorization. Such stmts will be marked as HYBRID. */
2396 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2397 decided_to_slp++;
2398 }
2399
2400 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2401
2402 if (decided_to_slp && dump_enabled_p ())
2403 {
2404 dump_printf_loc (MSG_NOTE, vect_location,
2405 "Decided to SLP %d instances. Unrolling factor ",
2406 decided_to_slp);
2407 dump_dec (MSG_NOTE, unrolling_factor);
2408 dump_printf (MSG_NOTE, "\n");
2409 }
2410
2411 return (decided_to_slp > 0);
2412 }
2413
2414
2415 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2416 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2417
2418 static void
2419 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
2420 hash_map<slp_tree, unsigned> &visited)
2421 {
2422 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2423 imm_use_iterator imm_iter;
2424 gimple *use_stmt;
2425 stmt_vec_info use_vinfo;
2426 slp_tree child;
2427 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2428 int j;
2429
2430 /* We need to union stype over the incoming graph edges but we still
2431 want to limit recursion to stay O(N+E). */
2432 bool only_edge = (++visited.get_or_insert (node) < node->refcnt);
2433
2434 /* Propagate hybrid down the SLP tree. */
2435 if (stype == hybrid)
2436 ;
2437 else if (HYBRID_SLP_STMT (stmt_vinfo))
2438 stype = hybrid;
2439 else if (!only_edge)
2440 {
2441 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2442 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2443 /* If we get a pattern stmt here we have to use the LHS of the
2444 original stmt for immediate uses. */
2445 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2446 tree def;
2447 if (gimple_code (stmt) == GIMPLE_PHI)
2448 def = gimple_phi_result (stmt);
2449 else
2450 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2451 if (def)
2452 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2453 {
2454 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2455 if (!use_vinfo)
2456 continue;
2457 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2458 if (!STMT_SLP_TYPE (use_vinfo)
2459 && (STMT_VINFO_RELEVANT (use_vinfo)
2460 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2461 && !(gimple_code (use_stmt) == GIMPLE_PHI
2462 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2463 {
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2466 "def in non-SLP stmt: %G", use_stmt);
2467 stype = hybrid;
2468 }
2469 }
2470 }
2471
2472 if (stype == hybrid
2473 && !HYBRID_SLP_STMT (stmt_vinfo))
2474 {
2475 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2477 stmt_vinfo->stmt);
2478 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2479 }
2480
2481 if (!only_edge)
2482 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2483 if (SLP_TREE_DEF_TYPE (child) != vect_external_def
2484 && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
2485 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
2486 }
2487
2488 static void
2489 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2490 {
2491 hash_map<slp_tree, unsigned> visited;
2492 vect_detect_hybrid_slp_stmts (node, i, stype, visited);
2493 }
2494
2495 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2496
2497 static tree
2498 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2499 {
2500 walk_stmt_info *wi = (walk_stmt_info *)data;
2501 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2502
2503 if (wi->is_lhs)
2504 return NULL_TREE;
2505
2506 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2507 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2508 {
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2511 def_stmt_info->stmt);
2512 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2513 }
2514
2515 return NULL_TREE;
2516 }
2517
2518 static tree
2519 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2520 walk_stmt_info *wi)
2521 {
2522 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2523 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2524 /* If the stmt is in a SLP instance then this isn't a reason
2525 to mark use definitions in other SLP instances as hybrid. */
2526 if (! STMT_SLP_TYPE (use_vinfo)
2527 && (STMT_VINFO_RELEVANT (use_vinfo)
2528 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2529 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2530 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2531 ;
2532 else
2533 *handled = true;
2534 return NULL_TREE;
2535 }
2536
2537 /* Find stmts that must be both vectorized and SLPed. */
2538
2539 void
2540 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2541 {
2542 unsigned int i;
2543 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2544 slp_instance instance;
2545
2546 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2547
2548 /* First walk all pattern stmt in the loop and mark defs of uses as
2549 hybrid because immediate uses in them are not recorded. */
2550 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2551 {
2552 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2553 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2554 gsi_next (&gsi))
2555 {
2556 gimple *stmt = gsi_stmt (gsi);
2557 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2558 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2559 {
2560 walk_stmt_info wi;
2561 memset (&wi, 0, sizeof (wi));
2562 wi.info = loop_vinfo;
2563 gimple_stmt_iterator gsi2
2564 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2565 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2566 vect_detect_hybrid_slp_1, &wi);
2567 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2568 vect_detect_hybrid_slp_2,
2569 vect_detect_hybrid_slp_1, &wi);
2570 }
2571 }
2572 }
2573
2574 /* Then walk the SLP instance trees marking stmts with uses in
2575 non-SLP stmts as hybrid, also propagating hybrid down the
2576 SLP tree, collecting the above info on-the-fly. */
2577 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2578 {
2579 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2580 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2581 i, pure_slp);
2582 }
2583 }
2584
2585
2586 /* Initialize a bb_vec_info struct for the statements between
2587 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2588
2589 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2590 gimple_stmt_iterator region_end_in,
2591 vec_info_shared *shared)
2592 : vec_info (vec_info::bb, init_cost (NULL), shared),
2593 bb (gsi_bb (region_begin_in)),
2594 region_begin (region_begin_in),
2595 region_end (region_end_in)
2596 {
2597 gimple_stmt_iterator gsi;
2598
2599 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2600 gsi_next (&gsi))
2601 {
2602 gimple *stmt = gsi_stmt (gsi);
2603 gimple_set_uid (stmt, 0);
2604 add_stmt (stmt);
2605 }
2606
2607 bb->aux = this;
2608 }
2609
2610
2611 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2612 stmts in the basic block. */
2613
2614 _bb_vec_info::~_bb_vec_info ()
2615 {
2616 for (gimple_stmt_iterator si = region_begin;
2617 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2618 /* Reset region marker. */
2619 gimple_set_uid (gsi_stmt (si), -1);
2620
2621 bb->aux = NULL;
2622 }
2623
2624 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2625 given then that child nodes have already been processed, and that
2626 their def types currently match their SLP node's def type. */
2627
2628 static bool
2629 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2630 slp_instance node_instance,
2631 stmt_vector_for_cost *cost_vec)
2632 {
2633 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2634 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2635
2636 /* For BB vectorization vector types are assigned here.
2637 Memory accesses already got their vector type assigned
2638 in vect_analyze_data_refs. */
2639 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2640 if (bb_vinfo
2641 && ! STMT_VINFO_DATA_REF (stmt_info))
2642 {
2643 tree vectype, nunits_vectype;
2644 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2645 &nunits_vectype))
2646 /* We checked this when building the node. */
2647 gcc_unreachable ();
2648 if (vectype == boolean_type_node)
2649 {
2650 vectype = vect_get_mask_type_for_stmt (stmt_info);
2651 if (!vectype)
2652 /* vect_get_mask_type_for_stmt has already explained the
2653 failure. */
2654 return false;
2655 }
2656
2657 stmt_vec_info sstmt_info;
2658 unsigned int i;
2659 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2660 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2661 }
2662
2663 /* Calculate the number of vector statements to be created for the
2664 scalar stmts in this node. For SLP reductions it is equal to the
2665 number of vector statements in the children (which has already been
2666 calculated by the recursive call). Otherwise it is the number of
2667 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2668 VF divided by the number of elements in a vector. */
2669 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2670 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2671 {
2672 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2673 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2674 {
2675 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2676 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2677 break;
2678 }
2679 }
2680 else
2681 {
2682 poly_uint64 vf;
2683 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2684 vf = loop_vinfo->vectorization_factor;
2685 else
2686 vf = 1;
2687 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2689 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2690 = vect_get_num_vectors (vf * group_size, vectype);
2691 }
2692
2693 bool dummy;
2694 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2695 }
2696
2697 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2698 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2699
2700 Return true if the operations are supported. */
2701
2702 static bool
2703 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2704 slp_instance node_instance,
2705 scalar_stmts_to_slp_tree_map_t *visited,
2706 scalar_stmts_to_slp_tree_map_t *lvisited,
2707 stmt_vector_for_cost *cost_vec)
2708 {
2709 int i, j;
2710 slp_tree child;
2711
2712 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2713 return true;
2714
2715 /* If we already analyzed the exact same set of scalar stmts we're done.
2716 We share the generated vector stmts for those. */
2717 slp_tree *leader;
2718 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2719 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2720 {
2721 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2722 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2723 return true;
2724 }
2725
2726 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2727 doesn't result in any issue since we throw away the lvisited set
2728 when we fail. */
2729 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2730
2731 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2732 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2733 visited, lvisited, cost_vec))
2734 return false;
2735
2736 /* ??? We have to catch the case late where two first scalar stmts appear
2737 in multiple SLP children with different def type and fail. Remember
2738 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2739 match it when that is vect_internal_def. */
2740 auto_vec<vect_def_type, 4> dt;
2741 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2742 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2743 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2744 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2745
2746 /* Push SLP node def-type to stmt operands. */
2747 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2748 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2749 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
2750 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2751 = SLP_TREE_DEF_TYPE (child);
2752
2753 /* Check everything worked out. */
2754 bool res = true;
2755 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2756 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2757 {
2758 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2759 {
2760 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2761 != SLP_TREE_DEF_TYPE (child))
2762 res = false;
2763 }
2764 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2765 != dt[j])
2766 res = false;
2767 }
2768 if (!res && dump_enabled_p ())
2769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2770 "not vectorized: same operand with different "
2771 "def type in stmt.\n");
2772
2773 if (res)
2774 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2775 cost_vec);
2776
2777 /* Restore def-types. */
2778 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2779 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2780 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2781
2782 return res;
2783 }
2784
2785
2786 /* Analyze statements in SLP instances of VINFO. Return true if the
2787 operations are supported. */
2788
2789 bool
2790 vect_slp_analyze_operations (vec_info *vinfo)
2791 {
2792 slp_instance instance;
2793 int i;
2794
2795 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2796
2797 scalar_stmts_to_slp_tree_map_t *visited
2798 = new scalar_stmts_to_slp_tree_map_t ();
2799 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2800 {
2801 scalar_stmts_to_slp_tree_map_t lvisited;
2802 stmt_vector_for_cost cost_vec;
2803 cost_vec.create (2);
2804 if (!vect_slp_analyze_node_operations (vinfo,
2805 SLP_INSTANCE_TREE (instance),
2806 instance, visited, &lvisited,
2807 &cost_vec))
2808 {
2809 slp_tree node = SLP_INSTANCE_TREE (instance);
2810 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_NOTE, vect_location,
2813 "removing SLP instance operations starting from: %G",
2814 stmt_info->stmt);
2815 vect_free_slp_instance (instance, false);
2816 vinfo->slp_instances.ordered_remove (i);
2817 cost_vec.release ();
2818 }
2819 else
2820 {
2821 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2822 x != lvisited.end(); ++x)
2823 visited->put ((*x).first.copy (), (*x).second);
2824 i++;
2825
2826 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2827 cost_vec.release ();
2828 }
2829 }
2830 delete visited;
2831
2832 return !vinfo->slp_instances.is_empty ();
2833 }
2834
2835
2836 /* Compute the scalar cost of the SLP node NODE and its children
2837 and return it. Do not account defs that are marked in LIFE and
2838 update LIFE according to uses of NODE. */
2839
2840 static void
2841 vect_bb_slp_scalar_cost (basic_block bb,
2842 slp_tree node, vec<bool, va_heap> *life,
2843 stmt_vector_for_cost *cost_vec,
2844 hash_set<slp_tree> &visited)
2845 {
2846 unsigned i;
2847 stmt_vec_info stmt_info;
2848 slp_tree child;
2849
2850 if (visited.add (node))
2851 return;
2852
2853 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2854 {
2855 gimple *stmt = stmt_info->stmt;
2856 vec_info *vinfo = stmt_info->vinfo;
2857 ssa_op_iter op_iter;
2858 def_operand_p def_p;
2859
2860 if ((*life)[i])
2861 continue;
2862
2863 /* If there is a non-vectorized use of the defs then the scalar
2864 stmt is kept live in which case we do not account it or any
2865 required defs in the SLP children in the scalar cost. This
2866 way we make the vectorization more costly when compared to
2867 the scalar cost. */
2868 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2869 {
2870 imm_use_iterator use_iter;
2871 gimple *use_stmt;
2872 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2873 if (!is_gimple_debug (use_stmt))
2874 {
2875 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2876 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2877 {
2878 (*life)[i] = true;
2879 BREAK_FROM_IMM_USE_STMT (use_iter);
2880 }
2881 }
2882 }
2883 if ((*life)[i])
2884 continue;
2885
2886 /* Count scalar stmts only once. */
2887 if (gimple_visited_p (stmt))
2888 continue;
2889 gimple_set_visited (stmt, true);
2890
2891 vect_cost_for_stmt kind;
2892 if (STMT_VINFO_DATA_REF (stmt_info))
2893 {
2894 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2895 kind = scalar_load;
2896 else
2897 kind = scalar_store;
2898 }
2899 else if (vect_nop_conversion_p (stmt_info))
2900 continue;
2901 else
2902 kind = scalar_stmt;
2903 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2904 }
2905
2906 auto_vec<bool, 20> subtree_life;
2907 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2908 {
2909 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2910 {
2911 /* Do not directly pass LIFE to the recursive call, copy it to
2912 confine changes in the callee to the current child/subtree. */
2913 subtree_life.safe_splice (*life);
2914 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
2915 visited);
2916 subtree_life.truncate (0);
2917 }
2918 }
2919 }
2920
2921 static void
2922 vect_bb_slp_scalar_cost (basic_block bb,
2923 slp_tree node, vec<bool, va_heap> *life,
2924 stmt_vector_for_cost *cost_vec)
2925 {
2926 hash_set<slp_tree> visited;
2927 vect_bb_slp_scalar_cost (bb, node, life, cost_vec, visited);
2928 }
2929
2930 /* Check if vectorization of the basic block is profitable. */
2931
2932 static bool
2933 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2934 {
2935 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2936 slp_instance instance;
2937 int i;
2938 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2939 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2940
2941 /* Calculate scalar cost. */
2942 stmt_vector_for_cost scalar_costs;
2943 scalar_costs.create (0);
2944 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2945 {
2946 auto_vec<bool, 20> life;
2947 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2948 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2949 SLP_INSTANCE_TREE (instance),
2950 &life, &scalar_costs);
2951 }
2952 void *target_cost_data = init_cost (NULL);
2953 add_stmt_costs (target_cost_data, &scalar_costs);
2954 scalar_costs.release ();
2955 unsigned dummy;
2956 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2957 destroy_cost_data (target_cost_data);
2958
2959 /* Unset visited flag. */
2960 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2961 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2962 gimple_set_visited (gsi_stmt (gsi), false);
2963
2964 /* Complete the target-specific cost calculation. */
2965 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2966 &vec_inside_cost, &vec_epilogue_cost);
2967
2968 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2969
2970 if (dump_enabled_p ())
2971 {
2972 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2973 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2974 vec_inside_cost);
2975 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2976 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2977 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2978 }
2979
2980 /* Vectorization is profitable if its cost is more than the cost of scalar
2981 version. Note that we err on the vector side for equal cost because
2982 the cost estimate is otherwise quite pessimistic (constant uses are
2983 free on the scalar side but cost a load on the vector side for
2984 example). */
2985 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2986 return false;
2987
2988 return true;
2989 }
2990
2991 /* Find any vectorizable constructors and add them to the grouped_store
2992 array. */
2993
2994 static void
2995 vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
2996 {
2997 gimple_stmt_iterator gsi;
2998
2999 for (gsi = bb_vinfo->region_begin;
3000 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3001 {
3002 gimple *stmt = gsi_stmt (gsi);
3003
3004 if (is_gimple_assign (stmt)
3005 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
3006 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
3007 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE)
3008 {
3009 tree rhs = gimple_assign_rhs1 (stmt);
3010
3011 if (CONSTRUCTOR_NELTS (rhs) == 0)
3012 continue;
3013
3014 poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs));
3015
3016 if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs)))
3017 continue;
3018
3019 if (dump_enabled_p ())
3020 dump_printf_loc (MSG_NOTE, vect_location,
3021 "Found vectorizable constructor: %G\n", stmt);
3022 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3023 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3024 }
3025 }
3026 }
3027
3028 /* Check if the region described by BB_VINFO can be vectorized, returning
3029 true if so. When returning false, set FATAL to true if the same failure
3030 would prevent vectorization at other vector sizes, false if it is still
3031 worth trying other sizes. N_STMTS is the number of statements in the
3032 region. */
3033
3034 static bool
3035 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
3036 {
3037 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3038
3039 slp_instance instance;
3040 int i;
3041 poly_uint64 min_vf = 2;
3042
3043 /* The first group of checks is independent of the vector size. */
3044 fatal = true;
3045
3046 /* Analyze the data references. */
3047
3048 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
3049 {
3050 if (dump_enabled_p ())
3051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3052 "not vectorized: unhandled data-ref in basic "
3053 "block.\n");
3054 return false;
3055 }
3056
3057 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
3058 {
3059 if (dump_enabled_p ())
3060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3061 "not vectorized: not enough data-refs in "
3062 "basic block.\n");
3063 return false;
3064 }
3065
3066 if (!vect_analyze_data_ref_accesses (bb_vinfo))
3067 {
3068 if (dump_enabled_p ())
3069 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3070 "not vectorized: unhandled data access in "
3071 "basic block.\n");
3072 return false;
3073 }
3074
3075 vect_slp_check_for_constructors (bb_vinfo);
3076
3077 /* If there are no grouped stores in the region there is no need
3078 to continue with pattern recog as vect_analyze_slp will fail
3079 anyway. */
3080 if (bb_vinfo->grouped_stores.is_empty ())
3081 {
3082 if (dump_enabled_p ())
3083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3084 "not vectorized: no grouped stores in "
3085 "basic block.\n");
3086 return false;
3087 }
3088
3089 /* While the rest of the analysis below depends on it in some way. */
3090 fatal = false;
3091
3092 vect_pattern_recog (bb_vinfo);
3093
3094 /* Check the SLP opportunities in the basic block, analyze and build SLP
3095 trees. */
3096 if (!vect_analyze_slp (bb_vinfo, n_stmts))
3097 {
3098 if (dump_enabled_p ())
3099 {
3100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3101 "Failed to SLP the basic block.\n");
3102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3103 "not vectorized: failed to find SLP opportunities "
3104 "in basic block.\n");
3105 }
3106 return false;
3107 }
3108
3109 vect_record_base_alignments (bb_vinfo);
3110
3111 /* Analyze and verify the alignment of data references and the
3112 dependence in the SLP instances. */
3113 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3114 {
3115 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3116 || ! vect_slp_analyze_instance_dependence (instance))
3117 {
3118 slp_tree node = SLP_INSTANCE_TREE (instance);
3119 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3120 if (dump_enabled_p ())
3121 dump_printf_loc (MSG_NOTE, vect_location,
3122 "removing SLP instance operations starting from: %G",
3123 stmt_info->stmt);
3124 vect_free_slp_instance (instance, false);
3125 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3126 continue;
3127 }
3128
3129 /* Mark all the statements that we want to vectorize as pure SLP and
3130 relevant. */
3131 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
3132 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3133 if (SLP_INSTANCE_ROOT_STMT (instance))
3134 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
3135
3136 i++;
3137 }
3138 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3139 return false;
3140
3141 if (!vect_slp_analyze_operations (bb_vinfo))
3142 {
3143 if (dump_enabled_p ())
3144 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3145 "not vectorized: bad operation in basic block.\n");
3146 return false;
3147 }
3148
3149 /* Cost model: check if the vectorization is worthwhile. */
3150 if (!unlimited_cost_model (NULL)
3151 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3152 {
3153 if (dump_enabled_p ())
3154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3155 "not vectorized: vectorization is not "
3156 "profitable.\n");
3157 return false;
3158 }
3159
3160 if (dump_enabled_p ())
3161 dump_printf_loc (MSG_NOTE, vect_location,
3162 "Basic block will be vectorized using SLP\n");
3163 return true;
3164 }
3165
3166 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3167 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3168 on success. The region has N_STMTS statements and has the datarefs
3169 given by DATAREFS. */
3170
3171 static bool
3172 vect_slp_bb_region (gimple_stmt_iterator region_begin,
3173 gimple_stmt_iterator region_end,
3174 vec<data_reference_p> datarefs,
3175 unsigned int n_stmts)
3176 {
3177 bb_vec_info bb_vinfo;
3178 auto_vector_modes vector_modes;
3179
3180 /* Autodetect first vector size we try. */
3181 machine_mode next_vector_mode = VOIDmode;
3182 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3183 unsigned int mode_i = 0;
3184
3185 vec_info_shared shared;
3186
3187 machine_mode autodetected_vector_mode = VOIDmode;
3188 while (1)
3189 {
3190 bool vectorized = false;
3191 bool fatal = false;
3192 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3193
3194 bool first_time_p = shared.datarefs.is_empty ();
3195 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3196 if (first_time_p)
3197 bb_vinfo->shared->save_datarefs ();
3198 else
3199 bb_vinfo->shared->check_datarefs ();
3200 bb_vinfo->vector_mode = next_vector_mode;
3201
3202 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
3203 && dbg_cnt (vect_slp))
3204 {
3205 if (dump_enabled_p ())
3206 {
3207 dump_printf_loc (MSG_NOTE, vect_location,
3208 "***** Analysis succeeded with vector mode"
3209 " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3210 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3211 }
3212
3213 bb_vinfo->shared->check_datarefs ();
3214 vect_schedule_slp (bb_vinfo);
3215
3216 unsigned HOST_WIDE_INT bytes;
3217 if (dump_enabled_p ())
3218 {
3219 if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
3220 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3221 "basic block part vectorized using %wu byte "
3222 "vectors\n", bytes);
3223 else
3224 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3225 "basic block part vectorized using variable "
3226 "length vectors\n");
3227 }
3228
3229 vectorized = true;
3230 }
3231 else
3232 {
3233 if (dump_enabled_p ())
3234 dump_printf_loc (MSG_NOTE, vect_location,
3235 "***** Analysis failed with vector mode %s\n",
3236 GET_MODE_NAME (bb_vinfo->vector_mode));
3237 }
3238
3239 if (mode_i == 0)
3240 autodetected_vector_mode = bb_vinfo->vector_mode;
3241
3242 if (!fatal)
3243 while (mode_i < vector_modes.length ()
3244 && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3245 {
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_NOTE, vect_location,
3248 "***** The result for vector mode %s would"
3249 " be the same\n",
3250 GET_MODE_NAME (vector_modes[mode_i]));
3251 mode_i += 1;
3252 }
3253
3254 delete bb_vinfo;
3255
3256 if (mode_i < vector_modes.length ()
3257 && VECTOR_MODE_P (autodetected_vector_mode)
3258 && (related_vector_mode (vector_modes[mode_i],
3259 GET_MODE_INNER (autodetected_vector_mode))
3260 == autodetected_vector_mode)
3261 && (related_vector_mode (autodetected_vector_mode,
3262 GET_MODE_INNER (vector_modes[mode_i]))
3263 == vector_modes[mode_i]))
3264 {
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_NOTE, vect_location,
3267 "***** Skipping vector mode %s, which would"
3268 " repeat the analysis for %s\n",
3269 GET_MODE_NAME (vector_modes[mode_i]),
3270 GET_MODE_NAME (autodetected_vector_mode));
3271 mode_i += 1;
3272 }
3273
3274 if (vectorized
3275 || mode_i == vector_modes.length ()
3276 || autodetected_vector_mode == VOIDmode
3277 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3278 vector sizes will fail do not bother iterating. */
3279 || fatal)
3280 return vectorized;
3281
3282 /* Try the next biggest vector size. */
3283 next_vector_mode = vector_modes[mode_i++];
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_NOTE, vect_location,
3286 "***** Re-trying analysis with vector mode %s\n",
3287 GET_MODE_NAME (next_vector_mode));
3288 }
3289 }
3290
3291 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3292 true if anything in the basic-block was vectorized. */
3293
3294 bool
3295 vect_slp_bb (basic_block bb)
3296 {
3297 gimple_stmt_iterator gsi;
3298 bool any_vectorized = false;
3299
3300 gsi = gsi_start_bb (bb);
3301 while (!gsi_end_p (gsi))
3302 {
3303 gimple_stmt_iterator region_begin = gsi;
3304 vec<data_reference_p> datarefs = vNULL;
3305 int insns = 0;
3306
3307 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3308 {
3309 gimple *stmt = gsi_stmt (gsi);
3310 if (is_gimple_debug (stmt))
3311 continue;
3312 insns++;
3313
3314 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3315 vect_location = stmt;
3316
3317 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3318 break;
3319 }
3320
3321 /* Skip leading unhandled stmts. */
3322 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3323 {
3324 gsi_next (&gsi);
3325 continue;
3326 }
3327
3328 gimple_stmt_iterator region_end = gsi;
3329
3330 if (insns > param_slp_max_insns_in_bb)
3331 {
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3334 "not vectorized: too many instructions in "
3335 "basic block.\n");
3336 }
3337 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
3338 any_vectorized = true;
3339
3340 if (gsi_end_p (region_end))
3341 break;
3342
3343 /* Skip the unhandled stmt. */
3344 gsi_next (&gsi);
3345 }
3346
3347 return any_vectorized;
3348 }
3349
3350
3351 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3352
3353 static bool
3354 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
3355 {
3356 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3357 tree op, vectype;
3358 enum vect_def_type dt;
3359
3360 /* For comparison and COND_EXPR type is chosen depending
3361 on the non-constant other comparison operand. */
3362 if (TREE_CODE_CLASS (code) == tcc_comparison)
3363 {
3364 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3365 op = gimple_assign_rhs1 (stmt);
3366
3367 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3368 gcc_unreachable ();
3369
3370 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3371 }
3372
3373 if (code == COND_EXPR)
3374 {
3375 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3376 tree cond = gimple_assign_rhs1 (stmt);
3377
3378 if (TREE_CODE (cond) == SSA_NAME)
3379 op = cond;
3380 else
3381 op = TREE_OPERAND (cond, 0);
3382
3383 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3384 gcc_unreachable ();
3385
3386 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3387 }
3388
3389 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3390 }
3391
3392 /* Build a variable-length vector in which the elements in ELTS are repeated
3393 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3394 RESULTS and add any new instructions to SEQ.
3395
3396 The approach we use is:
3397
3398 (1) Find a vector mode VM with integer elements of mode IM.
3399
3400 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3401 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3402 from small vectors to IM.
3403
3404 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3405
3406 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3407 correct byte contents.
3408
3409 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3410
3411 We try to find the largest IM for which this sequence works, in order
3412 to cut down on the number of interleaves. */
3413
3414 void
3415 duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
3416 vec<tree> elts, unsigned int nresults,
3417 vec<tree> &results)
3418 {
3419 unsigned int nelts = elts.length ();
3420 tree element_type = TREE_TYPE (vector_type);
3421
3422 /* (1) Find a vector mode VM with integer elements of mode IM. */
3423 unsigned int nvectors = 1;
3424 tree new_vector_type;
3425 tree permutes[2];
3426 if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
3427 &nvectors, &new_vector_type,
3428 permutes))
3429 gcc_unreachable ();
3430
3431 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3432 unsigned int partial_nelts = nelts / nvectors;
3433 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3434
3435 tree_vector_builder partial_elts;
3436 auto_vec<tree, 32> pieces (nvectors * 2);
3437 pieces.quick_grow (nvectors * 2);
3438 for (unsigned int i = 0; i < nvectors; ++i)
3439 {
3440 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3441 ELTS' has mode IM. */
3442 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3443 for (unsigned int j = 0; j < partial_nelts; ++j)
3444 partial_elts.quick_push (elts[i * partial_nelts + j]);
3445 tree t = gimple_build_vector (seq, &partial_elts);
3446 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3447 TREE_TYPE (new_vector_type), t);
3448
3449 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3450 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3451 }
3452
3453 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3454 correct byte contents.
3455
3456 We need to repeat the following operation log2(nvectors) times:
3457
3458 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3459 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3460
3461 However, if each input repeats every N elements and the VF is
3462 a multiple of N * 2, the HI result is the same as the LO. */
3463 unsigned int in_start = 0;
3464 unsigned int out_start = nvectors;
3465 unsigned int hi_start = nvectors / 2;
3466 /* A bound on the number of outputs needed to produce NRESULTS results
3467 in the final iteration. */
3468 unsigned int noutputs_bound = nvectors * nresults;
3469 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3470 {
3471 noutputs_bound /= 2;
3472 unsigned int limit = MIN (noutputs_bound, nvectors);
3473 for (unsigned int i = 0; i < limit; ++i)
3474 {
3475 if ((i & 1) != 0
3476 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3477 2 * in_repeat))
3478 {
3479 pieces[out_start + i] = pieces[out_start + i - 1];
3480 continue;
3481 }
3482
3483 tree output = make_ssa_name (new_vector_type);
3484 tree input1 = pieces[in_start + (i / 2)];
3485 tree input2 = pieces[in_start + (i / 2) + hi_start];
3486 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3487 input1, input2,
3488 permutes[i & 1]);
3489 gimple_seq_add_stmt (seq, stmt);
3490 pieces[out_start + i] = output;
3491 }
3492 std::swap (in_start, out_start);
3493 }
3494
3495 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3496 results.reserve (nresults);
3497 for (unsigned int i = 0; i < nresults; ++i)
3498 if (i < nvectors)
3499 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3500 pieces[in_start + i]));
3501 else
3502 results.quick_push (results[i - nvectors]);
3503 }
3504
3505
3506 /* For constant and loop invariant defs of SLP_NODE this function returns
3507 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3508 OP_NODE determines the node for the operand containing the scalar
3509 operands. */
3510
3511 static void
3512 vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
3513 vec<tree> *vec_oprnds)
3514 {
3515 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3516 vec_info *vinfo = stmt_vinfo->vinfo;
3517 unsigned HOST_WIDE_INT nunits;
3518 tree vec_cst;
3519 unsigned j, number_of_places_left_in_vector;
3520 tree vector_type;
3521 tree vop;
3522 int group_size = op_node->ops.length ();
3523 unsigned int vec_num, i;
3524 unsigned number_of_copies = 1;
3525 bool constant_p;
3526 tree neutral_op = NULL;
3527 gimple_seq ctor_seq = NULL;
3528 auto_vec<tree, 16> permute_results;
3529
3530 /* ??? SLP analysis should compute the vector type for the
3531 constant / invariant and store it in the SLP node. */
3532 tree op = op_node->ops[0];
3533 /* Check if vector type is a boolean vector. */
3534 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3535 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3536 && vect_mask_constant_operand_p (stmt_vinfo))
3537 vector_type = truth_type_for (stmt_vectype);
3538 else
3539 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
3540
3541 /* ??? For lane-reducing ops we should also have the required number
3542 of vector stmts initialized rather than second-guessing here. */
3543 unsigned int number_of_vectors;
3544 if (is_gimple_assign (stmt_vinfo->stmt)
3545 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3546 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3547 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3548 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3549 else
3550 number_of_vectors
3551 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3552 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3553 vector_type);
3554 vec_oprnds->create (number_of_vectors);
3555 auto_vec<tree> voprnds (number_of_vectors);
3556
3557 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3558 created vectors. It is greater than 1 if unrolling is performed.
3559
3560 For example, we have two scalar operands, s1 and s2 (e.g., group of
3561 strided accesses of size two), while NUNITS is four (i.e., four scalars
3562 of this type can be packed in a vector). The output vector will contain
3563 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3564 will be 2).
3565
3566 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3567 containing the operands.
3568
3569 For example, NUNITS is four as before, and the group size is 8
3570 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3571 {s5, s6, s7, s8}. */
3572
3573 /* When using duplicate_and_interleave, we just need one element for
3574 each scalar statement. */
3575 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3576 nunits = group_size;
3577
3578 number_of_copies = nunits * number_of_vectors / group_size;
3579
3580 number_of_places_left_in_vector = nunits;
3581 constant_p = true;
3582 tree_vector_builder elts (vector_type, nunits, 1);
3583 elts.quick_grow (nunits);
3584 bool place_after_defs = false;
3585 for (j = 0; j < number_of_copies; j++)
3586 {
3587 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
3588 {
3589 /* Create 'vect_ = {op0,op1,...,opn}'. */
3590 number_of_places_left_in_vector--;
3591 tree orig_op = op;
3592 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3593 {
3594 if (CONSTANT_CLASS_P (op))
3595 {
3596 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3597 {
3598 /* Can't use VIEW_CONVERT_EXPR for booleans because
3599 of possibly different sizes of scalar value and
3600 vector element. */
3601 if (integer_zerop (op))
3602 op = build_int_cst (TREE_TYPE (vector_type), 0);
3603 else if (integer_onep (op))
3604 op = build_all_ones_cst (TREE_TYPE (vector_type));
3605 else
3606 gcc_unreachable ();
3607 }
3608 else
3609 op = fold_unary (VIEW_CONVERT_EXPR,
3610 TREE_TYPE (vector_type), op);
3611 gcc_assert (op && CONSTANT_CLASS_P (op));
3612 }
3613 else
3614 {
3615 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3616 gimple *init_stmt;
3617 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3618 {
3619 tree true_val
3620 = build_all_ones_cst (TREE_TYPE (vector_type));
3621 tree false_val
3622 = build_zero_cst (TREE_TYPE (vector_type));
3623 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3624 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3625 op, true_val,
3626 false_val);
3627 }
3628 else
3629 {
3630 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3631 op);
3632 init_stmt
3633 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3634 op);
3635 }
3636 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3637 op = new_temp;
3638 }
3639 }
3640 elts[number_of_places_left_in_vector] = op;
3641 if (!CONSTANT_CLASS_P (op))
3642 constant_p = false;
3643 if (TREE_CODE (orig_op) == SSA_NAME
3644 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3645 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3646 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3647 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3648 place_after_defs = true;
3649
3650 if (number_of_places_left_in_vector == 0)
3651 {
3652 if (constant_p
3653 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3654 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3655 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3656 else
3657 {
3658 if (permute_results.is_empty ())
3659 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3660 elts, number_of_vectors,
3661 permute_results);
3662 vec_cst = permute_results[number_of_vectors - j - 1];
3663 }
3664 tree init;
3665 gimple_stmt_iterator gsi;
3666 if (place_after_defs)
3667 {
3668 stmt_vec_info last_stmt_info
3669 = vect_find_last_scalar_stmt_in_slp (slp_node);
3670 gsi = gsi_for_stmt (last_stmt_info->stmt);
3671 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3672 &gsi);
3673 }
3674 else
3675 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3676 NULL);
3677 if (ctor_seq != NULL)
3678 {
3679 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3680 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3681 ctor_seq = NULL;
3682 }
3683 voprnds.quick_push (init);
3684 place_after_defs = false;
3685 number_of_places_left_in_vector = nunits;
3686 constant_p = true;
3687 elts.new_vector (vector_type, nunits, 1);
3688 elts.quick_grow (nunits);
3689 }
3690 }
3691 }
3692
3693 /* Since the vectors are created in the reverse order, we should invert
3694 them. */
3695 vec_num = voprnds.length ();
3696 for (j = vec_num; j != 0; j--)
3697 {
3698 vop = voprnds[j - 1];
3699 vec_oprnds->quick_push (vop);
3700 }
3701
3702 /* In case that VF is greater than the unrolling factor needed for the SLP
3703 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3704 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3705 to replicate the vectors. */
3706 while (number_of_vectors > vec_oprnds->length ())
3707 {
3708 tree neutral_vec = NULL;
3709
3710 if (neutral_op)
3711 {
3712 if (!neutral_vec)
3713 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3714
3715 vec_oprnds->quick_push (neutral_vec);
3716 }
3717 else
3718 {
3719 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3720 vec_oprnds->quick_push (vop);
3721 }
3722 }
3723 }
3724
3725
3726 /* Get vectorized definitions from SLP_NODE that contains corresponding
3727 vectorized def-stmts. */
3728
3729 static void
3730 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3731 {
3732 stmt_vec_info vec_def_stmt_info;
3733 unsigned int i;
3734
3735 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3736
3737 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3738 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
3739 }
3740
3741
3742 /* Get N vectorized definitions for SLP_NODE.
3743 If the scalar definitions are loop invariants or constants, collect them and
3744 call vect_get_constant_vectors() to create vector stmts.
3745 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3746 must be stored in the corresponding child of SLP_NODE, and we call
3747 vect_get_slp_vect_defs () to retrieve them. */
3748
3749 void
3750 vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
3751 {
3752 if (n == -1U)
3753 n = SLP_TREE_CHILDREN (slp_node).length ();
3754
3755 for (unsigned i = 0; i < n; ++i)
3756 {
3757 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
3758
3759 vec<tree> vec_defs = vNULL;
3760
3761 /* For each operand we check if it has vectorized definitions in a child
3762 node or we need to create them (for invariants and constants). */
3763 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3764 {
3765 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3766 vect_get_slp_vect_defs (child, &vec_defs);
3767 }
3768 else
3769 vect_get_constant_vectors (child, slp_node, &vec_defs);
3770
3771 vec_oprnds->quick_push (vec_defs);
3772 }
3773 }
3774
3775 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3776 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3777 permute statements for the SLP node NODE of the SLP instance
3778 SLP_NODE_INSTANCE. */
3779
3780 bool
3781 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3782 gimple_stmt_iterator *gsi, poly_uint64 vf,
3783 slp_instance slp_node_instance, bool analyze_only,
3784 unsigned *n_perms)
3785 {
3786 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3787 vec_info *vinfo = stmt_info->vinfo;
3788 int vec_index = 0;
3789 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3790 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3791 unsigned int mask_element;
3792 machine_mode mode;
3793
3794 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3795 return false;
3796
3797 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3798
3799 mode = TYPE_MODE (vectype);
3800 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3801
3802 /* Initialize the vect stmts of NODE to properly insert the generated
3803 stmts later. */
3804 if (! analyze_only)
3805 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3806 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3807 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3808
3809 /* Generate permutation masks for every NODE. Number of masks for each NODE
3810 is equal to GROUP_SIZE.
3811 E.g., we have a group of three nodes with three loads from the same
3812 location in each node, and the vector size is 4. I.e., we have a
3813 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3814 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3815 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3816 ...
3817
3818 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3819 The last mask is illegal since we assume two operands for permute
3820 operation, and the mask element values can't be outside that range.
3821 Hence, the last mask must be converted into {2,5,5,5}.
3822 For the first two permutations we need the first and the second input
3823 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3824 we need the second and the third vectors: {b1,c1,a2,b2} and
3825 {c2,a3,b3,c3}. */
3826
3827 int vect_stmts_counter = 0;
3828 unsigned int index = 0;
3829 int first_vec_index = -1;
3830 int second_vec_index = -1;
3831 bool noop_p = true;
3832 *n_perms = 0;
3833
3834 vec_perm_builder mask;
3835 unsigned int nelts_to_build;
3836 unsigned int nvectors_per_build;
3837 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3838 && multiple_p (nunits, group_size));
3839 if (repeating_p)
3840 {
3841 /* A single vector contains a whole number of copies of the node, so:
3842 (a) all permutes can use the same mask; and
3843 (b) the permutes only need a single vector input. */
3844 mask.new_vector (nunits, group_size, 3);
3845 nelts_to_build = mask.encoded_nelts ();
3846 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3847 }
3848 else
3849 {
3850 /* We need to construct a separate mask for each vector statement. */
3851 unsigned HOST_WIDE_INT const_nunits, const_vf;
3852 if (!nunits.is_constant (&const_nunits)
3853 || !vf.is_constant (&const_vf))
3854 return false;
3855 mask.new_vector (const_nunits, const_nunits, 1);
3856 nelts_to_build = const_vf * group_size;
3857 nvectors_per_build = 1;
3858 }
3859
3860 unsigned int count = mask.encoded_nelts ();
3861 mask.quick_grow (count);
3862 vec_perm_indices indices;
3863
3864 for (unsigned int j = 0; j < nelts_to_build; j++)
3865 {
3866 unsigned int iter_num = j / group_size;
3867 unsigned int stmt_num = j % group_size;
3868 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3869 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3870 if (repeating_p)
3871 {
3872 first_vec_index = 0;
3873 mask_element = i;
3874 }
3875 else
3876 {
3877 /* Enforced before the loop when !repeating_p. */
3878 unsigned int const_nunits = nunits.to_constant ();
3879 vec_index = i / const_nunits;
3880 mask_element = i % const_nunits;
3881 if (vec_index == first_vec_index
3882 || first_vec_index == -1)
3883 {
3884 first_vec_index = vec_index;
3885 }
3886 else if (vec_index == second_vec_index
3887 || second_vec_index == -1)
3888 {
3889 second_vec_index = vec_index;
3890 mask_element += const_nunits;
3891 }
3892 else
3893 {
3894 if (dump_enabled_p ())
3895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3896 "permutation requires at "
3897 "least three vectors %G",
3898 stmt_info->stmt);
3899 gcc_assert (analyze_only);
3900 return false;
3901 }
3902
3903 gcc_assert (mask_element < 2 * const_nunits);
3904 }
3905
3906 if (mask_element != index)
3907 noop_p = false;
3908 mask[index++] = mask_element;
3909
3910 if (index == count && !noop_p)
3911 {
3912 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3913 if (!can_vec_perm_const_p (mode, indices))
3914 {
3915 if (dump_enabled_p ())
3916 {
3917 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3918 vect_location,
3919 "unsupported vect permute { ");
3920 for (i = 0; i < count; ++i)
3921 {
3922 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3923 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3924 }
3925 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3926 }
3927 gcc_assert (analyze_only);
3928 return false;
3929 }
3930
3931 ++*n_perms;
3932 }
3933
3934 if (index == count)
3935 {
3936 if (!analyze_only)
3937 {
3938 tree mask_vec = NULL_TREE;
3939
3940 if (! noop_p)
3941 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3942
3943 if (second_vec_index == -1)
3944 second_vec_index = first_vec_index;
3945
3946 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3947 {
3948 /* Generate the permute statement if necessary. */
3949 tree first_vec = dr_chain[first_vec_index + ri];
3950 tree second_vec = dr_chain[second_vec_index + ri];
3951 stmt_vec_info perm_stmt_info;
3952 if (! noop_p)
3953 {
3954 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3955 tree perm_dest
3956 = vect_create_destination_var (gimple_assign_lhs (stmt),
3957 vectype);
3958 perm_dest = make_ssa_name (perm_dest);
3959 gassign *perm_stmt
3960 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3961 first_vec, second_vec,
3962 mask_vec);
3963 perm_stmt_info
3964 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3965 gsi);
3966 }
3967 else
3968 /* If mask was NULL_TREE generate the requested
3969 identity transform. */
3970 perm_stmt_info = vinfo->lookup_def (first_vec);
3971
3972 /* Store the vector statement in NODE. */
3973 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3974 = perm_stmt_info;
3975 }
3976 }
3977
3978 index = 0;
3979 first_vec_index = -1;
3980 second_vec_index = -1;
3981 noop_p = true;
3982 }
3983 }
3984
3985 return true;
3986 }
3987
3988 /* Vectorize SLP instance tree in postorder. */
3989
3990 static void
3991 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3992 scalar_stmts_to_slp_tree_map_t *bst_map)
3993 {
3994 gimple_stmt_iterator si;
3995 stmt_vec_info stmt_info;
3996 unsigned int group_size;
3997 tree vectype;
3998 int i, j;
3999 slp_tree child;
4000
4001 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4002 return;
4003
4004 /* See if we have already vectorized the node in the graph of the
4005 SLP instance. */
4006 if (SLP_TREE_VEC_STMTS (node).exists ())
4007 return;
4008
4009 /* See if we have already vectorized the same set of stmts and reuse their
4010 vectorized stmts across instances. */
4011 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
4012 {
4013 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
4014 return;
4015 }
4016
4017 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
4018 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4019 vect_schedule_slp_instance (child, instance, bst_map);
4020
4021 /* Push SLP node def-type to stmts. */
4022 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4023 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4024 {
4025 stmt_vec_info child_stmt_info;
4026 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4027 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4028 }
4029
4030 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
4031
4032 /* VECTYPE is the type of the destination. */
4033 vectype = STMT_VINFO_VECTYPE (stmt_info);
4034 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4035 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
4036
4037 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
4038 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
4039
4040 if (dump_enabled_p ())
4041 dump_printf_loc (MSG_NOTE, vect_location,
4042 "------>vectorizing SLP node starting from: %G",
4043 stmt_info->stmt);
4044
4045 /* Vectorized stmts go before the last scalar stmt which is where
4046 all uses are ready. */
4047 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4048 si = gsi_for_stmt (last_stmt_info->stmt);
4049
4050 /* Handle two-operation SLP nodes by vectorizing the group with
4051 both operations and then performing a merge. */
4052 if (SLP_TREE_TWO_OPERATORS (node))
4053 {
4054 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4055 enum tree_code code0 = gimple_assign_rhs_code (stmt);
4056 enum tree_code ocode = ERROR_MARK;
4057 stmt_vec_info ostmt_info;
4058 vec_perm_builder mask (group_size, group_size, 1);
4059 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4060 {
4061 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4062 if (gimple_assign_rhs_code (ostmt) != code0)
4063 {
4064 mask.quick_push (1);
4065 ocode = gimple_assign_rhs_code (ostmt);
4066 }
4067 else
4068 mask.quick_push (0);
4069 }
4070 if (ocode != ERROR_MARK)
4071 {
4072 vec<stmt_vec_info> v0;
4073 vec<stmt_vec_info> v1;
4074 unsigned j;
4075 tree tmask = NULL_TREE;
4076 vect_transform_stmt (stmt_info, &si, node, instance);
4077 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4078 SLP_TREE_VEC_STMTS (node).truncate (0);
4079 gimple_assign_set_rhs_code (stmt, ocode);
4080 vect_transform_stmt (stmt_info, &si, node, instance);
4081 gimple_assign_set_rhs_code (stmt, code0);
4082 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4083 SLP_TREE_VEC_STMTS (node).truncate (0);
4084 tree meltype = build_nonstandard_integer_type
4085 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4086 tree mvectype = get_same_sized_vectype (meltype, vectype);
4087 unsigned k = 0, l;
4088 for (j = 0; j < v0.length (); ++j)
4089 {
4090 /* Enforced by vect_build_slp_tree, which rejects variable-length
4091 vectors for SLP_TREE_TWO_OPERATORS. */
4092 unsigned int const_nunits = nunits.to_constant ();
4093 tree_vector_builder melts (mvectype, const_nunits, 1);
4094 for (l = 0; l < const_nunits; ++l)
4095 {
4096 if (k >= group_size)
4097 k = 0;
4098 tree t = build_int_cst (meltype,
4099 mask[k++] * const_nunits + l);
4100 melts.quick_push (t);
4101 }
4102 tmask = melts.build ();
4103
4104 /* ??? Not all targets support a VEC_PERM_EXPR with a
4105 constant mask that would translate to a vec_merge RTX
4106 (with their vec_perm_const_ok). We can either not
4107 vectorize in that case or let veclower do its job.
4108 Unfortunately that isn't too great and at least for
4109 plus/minus we'd eventually like to match targets
4110 vector addsub instructions. */
4111 gimple *vstmt;
4112 vstmt = gimple_build_assign (make_ssa_name (vectype),
4113 VEC_PERM_EXPR,
4114 gimple_assign_lhs (v0[j]->stmt),
4115 gimple_assign_lhs (v1[j]->stmt),
4116 tmask);
4117 SLP_TREE_VEC_STMTS (node).quick_push
4118 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
4119 }
4120 v0.release ();
4121 v1.release ();
4122 return;
4123 }
4124 }
4125 vect_transform_stmt (stmt_info, &si, node, instance);
4126
4127 /* Restore stmt def-types. */
4128 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4129 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4130 {
4131 stmt_vec_info child_stmt_info;
4132 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4133 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4134 }
4135 }
4136
4137 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4138 For loop vectorization this is done in vectorizable_call, but for SLP
4139 it needs to be deferred until end of vect_schedule_slp, because multiple
4140 SLP instances may refer to the same scalar stmt. */
4141
4142 static void
4143 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4144 {
4145 gimple *new_stmt;
4146 gimple_stmt_iterator gsi;
4147 int i;
4148 slp_tree child;
4149 tree lhs;
4150 stmt_vec_info stmt_info;
4151
4152 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4153 return;
4154
4155 if (visited.add (node))
4156 return;
4157
4158 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4159 vect_remove_slp_scalar_calls (child, visited);
4160
4161 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4162 {
4163 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4164 if (!stmt || gimple_bb (stmt) == NULL)
4165 continue;
4166 if (is_pattern_stmt_p (stmt_info)
4167 || !PURE_SLP_STMT (stmt_info))
4168 continue;
4169 lhs = gimple_call_lhs (stmt);
4170 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4171 gsi = gsi_for_stmt (stmt);
4172 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4173 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4174 }
4175 }
4176
4177 static void
4178 vect_remove_slp_scalar_calls (slp_tree node)
4179 {
4180 hash_set<slp_tree> visited;
4181 vect_remove_slp_scalar_calls (node, visited);
4182 }
4183
4184 /* Vectorize the instance root. */
4185
4186 void
4187 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4188 {
4189 gassign *rstmt = NULL;
4190
4191 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4192 {
4193 stmt_vec_info child_stmt_info;
4194 int j;
4195
4196 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4197 {
4198 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4199 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4200 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4201 break;
4202 }
4203 }
4204 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4205 {
4206 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4207 stmt_vec_info child_stmt_info;
4208 int j;
4209 vec<constructor_elt, va_gc> *v;
4210 vec_alloc (v, nelts);
4211
4212 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4213 {
4214 CONSTRUCTOR_APPEND_ELT (v,
4215 NULL_TREE,
4216 gimple_get_lhs (child_stmt_info->stmt));
4217 }
4218 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4219 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4220 tree r_constructor = build_constructor (rtype, v);
4221 rstmt = gimple_build_assign (lhs, r_constructor);
4222 }
4223
4224 gcc_assert (rstmt);
4225
4226 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4227 gsi_replace (&rgsi, rstmt, true);
4228 }
4229
4230 /* Generate vector code for all SLP instances in the loop/basic block. */
4231
4232 void
4233 vect_schedule_slp (vec_info *vinfo)
4234 {
4235 vec<slp_instance> slp_instances;
4236 slp_instance instance;
4237 unsigned int i;
4238
4239 scalar_stmts_to_slp_tree_map_t *bst_map
4240 = new scalar_stmts_to_slp_tree_map_t ();
4241 slp_instances = vinfo->slp_instances;
4242 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4243 {
4244 slp_tree node = SLP_INSTANCE_TREE (instance);
4245 /* Schedule the tree of INSTANCE. */
4246 vect_schedule_slp_instance (node, instance, bst_map);
4247
4248 if (SLP_INSTANCE_ROOT_STMT (instance))
4249 vectorize_slp_instance_root_stmt (node, instance);
4250
4251 if (dump_enabled_p ())
4252 dump_printf_loc (MSG_NOTE, vect_location,
4253 "vectorizing stmts using SLP.\n");
4254 }
4255 delete bst_map;
4256
4257 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4258 {
4259 slp_tree root = SLP_INSTANCE_TREE (instance);
4260 stmt_vec_info store_info;
4261 unsigned int j;
4262
4263 /* Remove scalar call stmts. Do not do this for basic-block
4264 vectorization as not all uses may be vectorized.
4265 ??? Why should this be necessary? DCE should be able to
4266 remove the stmts itself.
4267 ??? For BB vectorization we can as well remove scalar
4268 stmts starting from the SLP tree root if they have no
4269 uses. */
4270 if (is_a <loop_vec_info> (vinfo))
4271 vect_remove_slp_scalar_calls (root);
4272
4273 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4274 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4275 {
4276 if (!STMT_VINFO_DATA_REF (store_info))
4277 break;
4278
4279 if (SLP_INSTANCE_ROOT_STMT (instance))
4280 continue;
4281
4282 store_info = vect_orig_stmt (store_info);
4283 /* Free the attached stmt_vec_info and remove the stmt. */
4284 vinfo->remove_stmt (store_info);
4285 }
4286 }
4287 }