]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-slp.c
Replace autovectorize_vector_sizes with autovectorize_vector_modes
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
42 #include "dbgcnt.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47
48
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
52
53 static void
54 vect_free_slp_tree (slp_tree node, bool final_p)
55 {
56 int i;
57 slp_tree child;
58
59 if (--node->refcnt != 0)
60 return;
61
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
63 vect_free_slp_tree (child, final_p);
64
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
69 if (!final_p)
70 {
71 stmt_vec_info stmt_info;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
73 {
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
76 }
77 }
78
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
81 SLP_TREE_SCALAR_OPS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
83 SLP_TREE_LOAD_PERMUTATION (node).release ();
84
85 free (node);
86 }
87
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
91
92 void
93 vect_free_slp_instance (slp_instance instance, bool final_p)
94 {
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96 SLP_INSTANCE_LOADS (instance).release ();
97 free (instance);
98 }
99
100
101 /* Create an SLP node for SCALAR_STMTS. */
102
103 static slp_tree
104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
105 {
106 slp_tree node;
107 stmt_vec_info stmt_info = scalar_stmts[0];
108 unsigned int nops;
109
110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
118 else if (is_a <gphi *> (stmt_info->stmt))
119 nops = 0;
120 else
121 return NULL;
122
123 node = XNEW (struct _slp_tree);
124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125 SLP_TREE_SCALAR_OPS (node) = vNULL;
126 SLP_TREE_VEC_STMTS (node).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
128 SLP_TREE_CHILDREN (node).create (nops);
129 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
130 SLP_TREE_TWO_OPERATORS (node) = false;
131 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
132 node->refcnt = 1;
133 node->max_nunits = 1;
134
135 unsigned i;
136 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
138
139 return node;
140 }
141
142 /* Create an SLP node for OPS. */
143
144 static slp_tree
145 vect_create_new_slp_node (vec<tree> ops)
146 {
147 slp_tree node;
148
149 node = XNEW (struct _slp_tree);
150 SLP_TREE_SCALAR_STMTS (node) = vNULL;
151 SLP_TREE_SCALAR_OPS (node) = ops;
152 SLP_TREE_VEC_STMTS (node).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154 SLP_TREE_CHILDREN (node) = vNULL;
155 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156 SLP_TREE_TWO_OPERATORS (node) = false;
157 SLP_TREE_DEF_TYPE (node) = vect_external_def;
158 node->refcnt = 1;
159 node->max_nunits = 1;
160
161 return node;
162 }
163
164
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
167 node. */
168 typedef struct _slp_oprnd_info
169 {
170 /* Def-stmts for the operands. */
171 vec<stmt_vec_info> def_stmts;
172 /* Operands. */
173 vec<tree> ops;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
176 stmt. */
177 tree first_op_type;
178 enum vect_def_type first_dt;
179 bool any_pattern;
180 } *slp_oprnd_info;
181
182
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184 operand. */
185 static vec<slp_oprnd_info>
186 vect_create_oprnd_info (int nops, int group_size)
187 {
188 int i;
189 slp_oprnd_info oprnd_info;
190 vec<slp_oprnd_info> oprnds_info;
191
192 oprnds_info.create (nops);
193 for (i = 0; i < nops; i++)
194 {
195 oprnd_info = XNEW (struct _slp_oprnd_info);
196 oprnd_info->def_stmts.create (group_size);
197 oprnd_info->ops.create (group_size);
198 oprnd_info->first_dt = vect_uninitialized_def;
199 oprnd_info->first_op_type = NULL_TREE;
200 oprnd_info->any_pattern = false;
201 oprnds_info.quick_push (oprnd_info);
202 }
203
204 return oprnds_info;
205 }
206
207
208 /* Free operands info. */
209
210 static void
211 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
212 {
213 int i;
214 slp_oprnd_info oprnd_info;
215
216 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
217 {
218 oprnd_info->def_stmts.release ();
219 oprnd_info->ops.release ();
220 XDELETE (oprnd_info);
221 }
222
223 oprnds_info.release ();
224 }
225
226
227 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
228 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
229 of the chain. */
230
231 int
232 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
233 stmt_vec_info first_stmt_info)
234 {
235 stmt_vec_info next_stmt_info = first_stmt_info;
236 int result = 0;
237
238 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
239 return -1;
240
241 do
242 {
243 if (next_stmt_info == stmt_info)
244 return result;
245 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
246 if (next_stmt_info)
247 result += DR_GROUP_GAP (next_stmt_info);
248 }
249 while (next_stmt_info);
250
251 return -1;
252 }
253
254 /* Check whether it is possible to load COUNT elements of type ELT_MODE
255 using the method implemented by duplicate_and_interleave. Return true
256 if so, returning the number of intermediate vectors in *NVECTORS_OUT
257 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
258 (if nonnull). */
259
260 bool
261 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
262 machine_mode elt_mode,
263 unsigned int *nvectors_out,
264 tree *vector_type_out,
265 tree *permutes)
266 {
267 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
268 poly_int64 nelts;
269 unsigned int nvectors = 1;
270 for (;;)
271 {
272 scalar_int_mode int_mode;
273 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
274 if (multiple_p (vinfo->vector_size, elt_bytes, &nelts)
275 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
276 {
277 tree int_type = build_nonstandard_integer_type
278 (GET_MODE_BITSIZE (int_mode), 1);
279 tree vector_type = build_vector_type (int_type, nelts);
280 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
281 {
282 vec_perm_builder sel1 (nelts, 2, 3);
283 vec_perm_builder sel2 (nelts, 2, 3);
284 poly_int64 half_nelts = exact_div (nelts, 2);
285 for (unsigned int i = 0; i < 3; ++i)
286 {
287 sel1.quick_push (i);
288 sel1.quick_push (i + nelts);
289 sel2.quick_push (half_nelts + i);
290 sel2.quick_push (half_nelts + i + nelts);
291 }
292 vec_perm_indices indices1 (sel1, 2, nelts);
293 vec_perm_indices indices2 (sel2, 2, nelts);
294 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
295 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
296 {
297 if (nvectors_out)
298 *nvectors_out = nvectors;
299 if (vector_type_out)
300 *vector_type_out = vector_type;
301 if (permutes)
302 {
303 permutes[0] = vect_gen_perm_mask_checked (vector_type,
304 indices1);
305 permutes[1] = vect_gen_perm_mask_checked (vector_type,
306 indices2);
307 }
308 return true;
309 }
310 }
311 }
312 if (!multiple_p (elt_bytes, 2, &elt_bytes))
313 return false;
314 nvectors *= 2;
315 }
316 }
317
318 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
319 they are of a valid type and that they match the defs of the first stmt of
320 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
321 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
322 indicates swap is required for cond_expr stmts. Specifically, *SWAP
323 is 1 if STMT is cond and operands of comparison need to be swapped;
324 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
325 If there is any operand swap in this function, *SWAP is set to non-zero
326 value.
327 If there was a fatal error return -1; if the error could be corrected by
328 swapping operands of father node of this one, return 1; if everything is
329 ok return 0. */
330 static int
331 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
332 vec<stmt_vec_info> stmts, unsigned stmt_num,
333 vec<slp_oprnd_info> *oprnds_info)
334 {
335 stmt_vec_info stmt_info = stmts[stmt_num];
336 tree oprnd;
337 unsigned int i, number_of_oprnds;
338 enum vect_def_type dt = vect_uninitialized_def;
339 slp_oprnd_info oprnd_info;
340 int first_op_idx = 1;
341 unsigned int commutative_op = -1U;
342 bool first_op_cond = false;
343 bool first = stmt_num == 0;
344
345 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
346 {
347 number_of_oprnds = gimple_call_num_args (stmt);
348 first_op_idx = 3;
349 if (gimple_call_internal_p (stmt))
350 {
351 internal_fn ifn = gimple_call_internal_fn (stmt);
352 commutative_op = first_commutative_argument (ifn);
353
354 /* Masked load, only look at mask. */
355 if (ifn == IFN_MASK_LOAD)
356 {
357 number_of_oprnds = 1;
358 /* Mask operand index. */
359 first_op_idx = 5;
360 }
361 }
362 }
363 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
364 {
365 enum tree_code code = gimple_assign_rhs_code (stmt);
366 number_of_oprnds = gimple_num_ops (stmt) - 1;
367 /* Swap can only be done for cond_expr if asked to, otherwise we
368 could result in different comparison code to the first stmt. */
369 if (code == COND_EXPR
370 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
371 {
372 first_op_cond = true;
373 number_of_oprnds++;
374 }
375 else
376 commutative_op = commutative_tree_code (code) ? 0U : -1U;
377 }
378 else
379 return -1;
380
381 bool swapped = (*swap != 0);
382 gcc_assert (!swapped || first_op_cond);
383 for (i = 0; i < number_of_oprnds; i++)
384 {
385 again:
386 if (first_op_cond)
387 {
388 /* Map indicating how operands of cond_expr should be swapped. */
389 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
390 int *map = maps[*swap];
391
392 if (i < 2)
393 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
394 first_op_idx), map[i]);
395 else
396 oprnd = gimple_op (stmt_info->stmt, map[i]);
397 }
398 else
399 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
400 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
401 oprnd = TREE_OPERAND (oprnd, 0);
402
403 oprnd_info = (*oprnds_info)[i];
404
405 stmt_vec_info def_stmt_info;
406 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
407 {
408 if (dump_enabled_p ())
409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
410 "Build SLP failed: can't analyze def for %T\n",
411 oprnd);
412
413 return -1;
414 }
415
416 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
417 oprnd_info->any_pattern = true;
418
419 if (first)
420 {
421 /* For the swapping logic below force vect_reduction_def
422 for the reduction op in a SLP reduction group. */
423 if (!STMT_VINFO_DATA_REF (stmt_info)
424 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
425 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
426 && def_stmt_info)
427 dt = vect_reduction_def;
428 oprnd_info->first_dt = dt;
429 oprnd_info->first_op_type = TREE_TYPE (oprnd);
430 }
431 else
432 {
433 /* Not first stmt of the group, check that the def-stmt/s match
434 the def-stmt/s of the first stmt. Allow different definition
435 types for reduction chains: the first stmt must be a
436 vect_reduction_def (a phi node), and the rest
437 end in the reduction chain. */
438 tree type = TREE_TYPE (oprnd);
439 if ((oprnd_info->first_dt != dt
440 && !(oprnd_info->first_dt == vect_reduction_def
441 && !STMT_VINFO_DATA_REF (stmt_info)
442 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
443 && def_stmt_info
444 && !STMT_VINFO_DATA_REF (def_stmt_info)
445 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
446 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
447 && !((oprnd_info->first_dt == vect_external_def
448 || oprnd_info->first_dt == vect_constant_def)
449 && (dt == vect_external_def
450 || dt == vect_constant_def)))
451 || !types_compatible_p (oprnd_info->first_op_type, type)
452 || (!STMT_VINFO_DATA_REF (stmt_info)
453 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
454 && ((!def_stmt_info
455 || STMT_VINFO_DATA_REF (def_stmt_info)
456 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
457 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
458 != (oprnd_info->first_dt != vect_reduction_def))))
459 {
460 /* Try swapping operands if we got a mismatch. */
461 if (i == commutative_op && !swapped)
462 {
463 if (dump_enabled_p ())
464 dump_printf_loc (MSG_NOTE, vect_location,
465 "trying swapped operands\n");
466 swapped = true;
467 goto again;
468 }
469
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
472 "Build SLP failed: different types\n");
473
474 return 1;
475 }
476 if ((dt == vect_constant_def
477 || dt == vect_external_def)
478 && !vinfo->vector_size.is_constant ()
479 && (TREE_CODE (type) == BOOLEAN_TYPE
480 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
481 TYPE_MODE (type))))
482 {
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
485 "Build SLP failed: invalid type of def "
486 "for variable-length SLP %T\n", oprnd);
487 return -1;
488 }
489 }
490
491 /* Check the types of the definitions. */
492 switch (dt)
493 {
494 case vect_external_def:
495 /* Make sure to demote the overall operand to external. */
496 oprnd_info->first_dt = vect_external_def;
497 /* Fallthru. */
498 case vect_constant_def:
499 oprnd_info->def_stmts.quick_push (NULL);
500 oprnd_info->ops.quick_push (oprnd);
501 break;
502
503 case vect_internal_def:
504 case vect_reduction_def:
505 if (oprnd_info->first_dt == vect_reduction_def
506 && !STMT_VINFO_DATA_REF (stmt_info)
507 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
508 && !STMT_VINFO_DATA_REF (def_stmt_info)
509 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
510 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
511 {
512 /* For a SLP reduction chain we want to duplicate the
513 reduction to each of the chain members. That gets
514 us a sane SLP graph (still the stmts are not 100%
515 correct wrt the initial values). */
516 gcc_assert (!first);
517 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
518 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
519 break;
520 }
521 /* Fallthru. */
522 case vect_induction_def:
523 oprnd_info->def_stmts.quick_push (def_stmt_info);
524 oprnd_info->ops.quick_push (oprnd);
525 break;
526
527 default:
528 /* FORNOW: Not supported. */
529 if (dump_enabled_p ())
530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531 "Build SLP failed: illegal type of def %T\n",
532 oprnd);
533
534 return -1;
535 }
536 }
537
538 /* Swap operands. */
539 if (swapped)
540 {
541 if (first_op_cond)
542 {
543 /* If there are already uses of this stmt in a SLP instance then
544 we've committed to the operand order and can't swap it. */
545 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
546 {
547 if (dump_enabled_p ())
548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
549 "Build SLP failed: cannot swap operands of "
550 "shared stmt %G", stmt_info->stmt);
551 return -1;
552 }
553
554 /* To get rid of this swapping we have to move the stmt code
555 to the SLP tree as well (and gather it here per stmt). */
556 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
557 tree cond = gimple_assign_rhs1 (stmt);
558 enum tree_code code = TREE_CODE (cond);
559
560 /* Swap. */
561 if (*swap == 1)
562 {
563 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
564 &TREE_OPERAND (cond, 1));
565 TREE_SET_CODE (cond, swap_tree_comparison (code));
566 }
567 /* Invert. */
568 else
569 {
570 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
571 gimple_assign_rhs3_ptr (stmt));
572 if (STMT_VINFO_REDUC_IDX (stmt_info) == 1)
573 STMT_VINFO_REDUC_IDX (stmt_info) = 2;
574 else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
575 STMT_VINFO_REDUC_IDX (stmt_info) = 1;
576 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
577 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
578 gcc_assert (code != ERROR_MARK);
579 TREE_SET_CODE (cond, code);
580 }
581 }
582 else
583 {
584 /* Commutative ops need not reflect swapping, ops are in
585 the SLP tree. */
586 }
587 if (dump_enabled_p ())
588 dump_printf_loc (MSG_NOTE, vect_location,
589 "swapped operands to match def types in %G",
590 stmt_info->stmt);
591 }
592
593 *swap = swapped;
594 return 0;
595 }
596
597 /* Return true if call statements CALL1 and CALL2 are similar enough
598 to be combined into the same SLP group. */
599
600 static bool
601 compatible_calls_p (gcall *call1, gcall *call2)
602 {
603 unsigned int nargs = gimple_call_num_args (call1);
604 if (nargs != gimple_call_num_args (call2))
605 return false;
606
607 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
608 return false;
609
610 if (gimple_call_internal_p (call1))
611 {
612 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
613 TREE_TYPE (gimple_call_lhs (call2))))
614 return false;
615 for (unsigned int i = 0; i < nargs; ++i)
616 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
617 TREE_TYPE (gimple_call_arg (call2, i))))
618 return false;
619 }
620 else
621 {
622 if (!operand_equal_p (gimple_call_fn (call1),
623 gimple_call_fn (call2), 0))
624 return false;
625
626 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
627 return false;
628 }
629 return true;
630 }
631
632 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
633 caller's attempt to find the vector type in STMT_INFO with the narrowest
634 element type. Return true if VECTYPE is nonnull and if it is valid
635 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
636 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
637 vect_build_slp_tree. */
638
639 static bool
640 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
641 tree vectype, poly_uint64 *max_nunits)
642 {
643 if (!vectype)
644 {
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
647 "Build SLP failed: unsupported data-type in %G\n",
648 stmt_info->stmt);
649 /* Fatal mismatch. */
650 return false;
651 }
652
653 /* If populating the vector type requires unrolling then fail
654 before adjusting *max_nunits for basic-block vectorization. */
655 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
656 unsigned HOST_WIDE_INT const_nunits;
657 if (STMT_VINFO_BB_VINFO (stmt_info)
658 && (!nunits.is_constant (&const_nunits)
659 || const_nunits > group_size))
660 {
661 if (dump_enabled_p ())
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
663 "Build SLP failed: unrolling required "
664 "in basic block SLP\n");
665 /* Fatal mismatch. */
666 return false;
667 }
668
669 /* In case of multiple types we need to detect the smallest type. */
670 vect_update_max_nunits (max_nunits, vectype);
671 return true;
672 }
673
674 /* STMTS is a group of GROUP_SIZE SLP statements in which some
675 statements do the same operation as the first statement and in which
676 the others do ALT_STMT_CODE. Return true if we can take one vector
677 of the first operation and one vector of the second and permute them
678 to get the required result. VECTYPE is the type of the vector that
679 would be permuted. */
680
681 static bool
682 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
683 unsigned int group_size, tree vectype,
684 tree_code alt_stmt_code)
685 {
686 unsigned HOST_WIDE_INT count;
687 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
688 return false;
689
690 vec_perm_builder sel (count, count, 1);
691 for (unsigned int i = 0; i < count; ++i)
692 {
693 unsigned int elt = i;
694 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
695 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
696 elt += count;
697 sel.quick_push (elt);
698 }
699 vec_perm_indices indices (sel, 2, count);
700 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
701 }
702
703 /* Verify if the scalar stmts STMTS are isomorphic, require data
704 permutation or are of unsupported types of operation. Return
705 true if they are, otherwise return false and indicate in *MATCHES
706 which stmts are not isomorphic to the first one. If MATCHES[0]
707 is false then this indicates the comparison could not be
708 carried out or the stmts will never be vectorized by SLP.
709
710 Note COND_EXPR is possibly isomorphic to another one after swapping its
711 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
712 the first stmt by swapping the two operands of comparison; set SWAP[i]
713 to 2 if stmt I is isormorphic to the first stmt by inverting the code
714 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
715 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
716
717 static bool
718 vect_build_slp_tree_1 (unsigned char *swap,
719 vec<stmt_vec_info> stmts, unsigned int group_size,
720 poly_uint64 *max_nunits, bool *matches,
721 bool *two_operators)
722 {
723 unsigned int i;
724 stmt_vec_info first_stmt_info = stmts[0];
725 enum tree_code first_stmt_code = ERROR_MARK;
726 enum tree_code alt_stmt_code = ERROR_MARK;
727 enum tree_code rhs_code = ERROR_MARK;
728 enum tree_code first_cond_code = ERROR_MARK;
729 tree lhs;
730 bool need_same_oprnds = false;
731 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
732 optab optab;
733 int icode;
734 machine_mode optab_op2_mode;
735 machine_mode vec_mode;
736 stmt_vec_info first_load = NULL, prev_first_load = NULL;
737 bool load_p = false;
738
739 /* For every stmt in NODE find its def stmt/s. */
740 stmt_vec_info stmt_info;
741 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
742 {
743 gimple *stmt = stmt_info->stmt;
744 swap[i] = 0;
745 matches[i] = false;
746
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
749
750 /* Fail to vectorize statements marked as unvectorizable. */
751 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
752 {
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
755 "Build SLP failed: unvectorizable statement %G",
756 stmt);
757 /* Fatal mismatch. */
758 matches[0] = false;
759 return false;
760 }
761
762 lhs = gimple_get_lhs (stmt);
763 if (lhs == NULL_TREE)
764 {
765 if (dump_enabled_p ())
766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
767 "Build SLP failed: not GIMPLE_ASSIGN nor "
768 "GIMPLE_CALL %G", stmt);
769 /* Fatal mismatch. */
770 matches[0] = false;
771 return false;
772 }
773
774 tree nunits_vectype;
775 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
776 &nunits_vectype)
777 || (nunits_vectype
778 && !vect_record_max_nunits (stmt_info, group_size,
779 nunits_vectype, max_nunits)))
780 {
781 /* Fatal mismatch. */
782 matches[0] = false;
783 return false;
784 }
785
786 gcc_assert (vectype);
787
788 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
789 {
790 rhs_code = CALL_EXPR;
791
792 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
793 load_p = true;
794 else if ((gimple_call_internal_p (call_stmt)
795 && (!vectorizable_internal_fn_p
796 (gimple_call_internal_fn (call_stmt))))
797 || gimple_call_tail_p (call_stmt)
798 || gimple_call_noreturn_p (call_stmt)
799 || !gimple_call_nothrow_p (call_stmt)
800 || gimple_call_chain (call_stmt))
801 {
802 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
804 "Build SLP failed: unsupported call type %G",
805 call_stmt);
806 /* Fatal mismatch. */
807 matches[0] = false;
808 return false;
809 }
810 }
811 else
812 {
813 rhs_code = gimple_assign_rhs_code (stmt);
814 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
815 }
816
817 /* Check the operation. */
818 if (i == 0)
819 {
820 first_stmt_code = rhs_code;
821
822 /* Shift arguments should be equal in all the packed stmts for a
823 vector shift with scalar shift operand. */
824 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
825 || rhs_code == LROTATE_EXPR
826 || rhs_code == RROTATE_EXPR)
827 {
828 if (vectype == boolean_type_node)
829 {
830 if (dump_enabled_p ())
831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
832 "Build SLP failed: shift of a"
833 " boolean.\n");
834 /* Fatal mismatch. */
835 matches[0] = false;
836 return false;
837 }
838
839 vec_mode = TYPE_MODE (vectype);
840
841 /* First see if we have a vector/vector shift. */
842 optab = optab_for_tree_code (rhs_code, vectype,
843 optab_vector);
844
845 if (!optab
846 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
847 {
848 /* No vector/vector shift, try for a vector/scalar shift. */
849 optab = optab_for_tree_code (rhs_code, vectype,
850 optab_scalar);
851
852 if (!optab)
853 {
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
856 "Build SLP failed: no optab.\n");
857 /* Fatal mismatch. */
858 matches[0] = false;
859 return false;
860 }
861 icode = (int) optab_handler (optab, vec_mode);
862 if (icode == CODE_FOR_nothing)
863 {
864 if (dump_enabled_p ())
865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
866 "Build SLP failed: "
867 "op not supported by target.\n");
868 /* Fatal mismatch. */
869 matches[0] = false;
870 return false;
871 }
872 optab_op2_mode = insn_data[icode].operand[2].mode;
873 if (!VECTOR_MODE_P (optab_op2_mode))
874 {
875 need_same_oprnds = true;
876 first_op1 = gimple_assign_rhs2 (stmt);
877 }
878 }
879 }
880 else if (rhs_code == WIDEN_LSHIFT_EXPR)
881 {
882 need_same_oprnds = true;
883 first_op1 = gimple_assign_rhs2 (stmt);
884 }
885 }
886 else
887 {
888 if (first_stmt_code != rhs_code
889 && alt_stmt_code == ERROR_MARK)
890 alt_stmt_code = rhs_code;
891 if (first_stmt_code != rhs_code
892 && (first_stmt_code != IMAGPART_EXPR
893 || rhs_code != REALPART_EXPR)
894 && (first_stmt_code != REALPART_EXPR
895 || rhs_code != IMAGPART_EXPR)
896 /* Handle mismatches in plus/minus by computing both
897 and merging the results. */
898 && !((first_stmt_code == PLUS_EXPR
899 || first_stmt_code == MINUS_EXPR)
900 && (alt_stmt_code == PLUS_EXPR
901 || alt_stmt_code == MINUS_EXPR)
902 && rhs_code == alt_stmt_code)
903 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
904 && (first_stmt_code == ARRAY_REF
905 || first_stmt_code == BIT_FIELD_REF
906 || first_stmt_code == INDIRECT_REF
907 || first_stmt_code == COMPONENT_REF
908 || first_stmt_code == MEM_REF)))
909 {
910 if (dump_enabled_p ())
911 {
912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
913 "Build SLP failed: different operation "
914 "in stmt %G", stmt);
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
916 "original stmt %G", first_stmt_info->stmt);
917 }
918 /* Mismatch. */
919 continue;
920 }
921
922 if (need_same_oprnds
923 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
924 {
925 if (dump_enabled_p ())
926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
927 "Build SLP failed: different shift "
928 "arguments in %G", stmt);
929 /* Mismatch. */
930 continue;
931 }
932
933 if (!load_p && rhs_code == CALL_EXPR)
934 {
935 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
936 as_a <gcall *> (stmt)))
937 {
938 if (dump_enabled_p ())
939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
940 "Build SLP failed: different calls in %G",
941 stmt);
942 /* Mismatch. */
943 continue;
944 }
945 }
946 }
947
948 /* Grouped store or load. */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
950 {
951 if (REFERENCE_CLASS_P (lhs))
952 {
953 /* Store. */
954 ;
955 }
956 else
957 {
958 /* Load. */
959 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
960 if (prev_first_load)
961 {
962 /* Check that there are no loads from different interleaving
963 chains in the same node. */
964 if (prev_first_load != first_load)
965 {
966 if (dump_enabled_p ())
967 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
968 vect_location,
969 "Build SLP failed: different "
970 "interleaving chains in one node %G",
971 stmt);
972 /* Mismatch. */
973 continue;
974 }
975 }
976 else
977 prev_first_load = first_load;
978 }
979 } /* Grouped access. */
980 else
981 {
982 if (load_p)
983 {
984 /* Not grouped load. */
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
987 "Build SLP failed: not grouped load %G", stmt);
988
989 /* FORNOW: Not grouped loads are not supported. */
990 /* Fatal mismatch. */
991 matches[0] = false;
992 return false;
993 }
994
995 /* Not memory operation. */
996 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
997 && TREE_CODE_CLASS (rhs_code) != tcc_unary
998 && TREE_CODE_CLASS (rhs_code) != tcc_expression
999 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1000 && rhs_code != VIEW_CONVERT_EXPR
1001 && rhs_code != CALL_EXPR)
1002 {
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1005 "Build SLP failed: operation unsupported %G",
1006 stmt);
1007 /* Fatal mismatch. */
1008 matches[0] = false;
1009 return false;
1010 }
1011
1012 if (rhs_code == COND_EXPR)
1013 {
1014 tree cond_expr = gimple_assign_rhs1 (stmt);
1015 enum tree_code cond_code = TREE_CODE (cond_expr);
1016 enum tree_code swap_code = ERROR_MARK;
1017 enum tree_code invert_code = ERROR_MARK;
1018
1019 if (i == 0)
1020 first_cond_code = TREE_CODE (cond_expr);
1021 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1022 {
1023 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1024 swap_code = swap_tree_comparison (cond_code);
1025 invert_code = invert_tree_comparison (cond_code, honor_nans);
1026 }
1027
1028 if (first_cond_code == cond_code)
1029 ;
1030 /* Isomorphic can be achieved by swapping. */
1031 else if (first_cond_code == swap_code)
1032 swap[i] = 1;
1033 /* Isomorphic can be achieved by inverting. */
1034 else if (first_cond_code == invert_code)
1035 swap[i] = 2;
1036 else
1037 {
1038 if (dump_enabled_p ())
1039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1040 "Build SLP failed: different"
1041 " operation %G", stmt);
1042 /* Mismatch. */
1043 continue;
1044 }
1045 }
1046 }
1047
1048 matches[i] = true;
1049 }
1050
1051 for (i = 0; i < group_size; ++i)
1052 if (!matches[i])
1053 return false;
1054
1055 /* If we allowed a two-operation SLP node verify the target can cope
1056 with the permute we are going to use. */
1057 if (alt_stmt_code != ERROR_MARK
1058 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1059 {
1060 if (vectype == boolean_type_node
1061 || !vect_two_operations_perm_ok_p (stmts, group_size,
1062 vectype, alt_stmt_code))
1063 {
1064 for (i = 0; i < group_size; ++i)
1065 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1066 {
1067 matches[i] = false;
1068 if (dump_enabled_p ())
1069 {
1070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1071 "Build SLP failed: different operation "
1072 "in stmt %G", stmts[i]->stmt);
1073 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1074 "original stmt %G", first_stmt_info->stmt);
1075 }
1076 }
1077 return false;
1078 }
1079 *two_operators = true;
1080 }
1081
1082 return true;
1083 }
1084
1085 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1086 Note we never remove apart from at destruction time so we do not
1087 need a special value for deleted that differs from empty. */
1088 struct bst_traits
1089 {
1090 typedef vec <stmt_vec_info> value_type;
1091 typedef vec <stmt_vec_info> compare_type;
1092 static inline hashval_t hash (value_type);
1093 static inline bool equal (value_type existing, value_type candidate);
1094 static inline bool is_empty (value_type x) { return !x.exists (); }
1095 static inline bool is_deleted (value_type x) { return !x.exists (); }
1096 static inline void mark_empty (value_type &x) { x.release (); }
1097 static inline void mark_deleted (value_type &x) { x.release (); }
1098 static inline void remove (value_type &x) { x.release (); }
1099 };
1100 inline hashval_t
1101 bst_traits::hash (value_type x)
1102 {
1103 inchash::hash h;
1104 for (unsigned i = 0; i < x.length (); ++i)
1105 h.add_int (gimple_uid (x[i]->stmt));
1106 return h.end ();
1107 }
1108 inline bool
1109 bst_traits::equal (value_type existing, value_type candidate)
1110 {
1111 if (existing.length () != candidate.length ())
1112 return false;
1113 for (unsigned i = 0; i < existing.length (); ++i)
1114 if (existing[i] != candidate[i])
1115 return false;
1116 return true;
1117 }
1118
1119 typedef hash_map <vec <gimple *>, slp_tree,
1120 simple_hashmap_traits <bst_traits, slp_tree> >
1121 scalar_stmts_to_slp_tree_map_t;
1122
1123 static slp_tree
1124 vect_build_slp_tree_2 (vec_info *vinfo,
1125 vec<stmt_vec_info> stmts, unsigned int group_size,
1126 poly_uint64 *max_nunits,
1127 bool *matches, unsigned *npermutes, unsigned *tree_size,
1128 scalar_stmts_to_slp_tree_map_t *bst_map);
1129
1130 static slp_tree
1131 vect_build_slp_tree (vec_info *vinfo,
1132 vec<stmt_vec_info> stmts, unsigned int group_size,
1133 poly_uint64 *max_nunits,
1134 bool *matches, unsigned *npermutes, unsigned *tree_size,
1135 scalar_stmts_to_slp_tree_map_t *bst_map)
1136 {
1137 if (slp_tree *leader = bst_map->get (stmts))
1138 {
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1141 *leader ? "" : "failed ", *leader);
1142 if (*leader)
1143 {
1144 (*leader)->refcnt++;
1145 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1146 }
1147 return *leader;
1148 }
1149 poly_uint64 this_max_nunits = 1;
1150 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1151 matches, npermutes, tree_size, bst_map);
1152 if (res)
1153 {
1154 res->max_nunits = this_max_nunits;
1155 vect_update_max_nunits (max_nunits, this_max_nunits);
1156 /* Keep a reference for the bst_map use. */
1157 res->refcnt++;
1158 }
1159 bst_map->put (stmts.copy (), res);
1160 return res;
1161 }
1162
1163 /* Recursively build an SLP tree starting from NODE.
1164 Fail (and return a value not equal to zero) if def-stmts are not
1165 isomorphic, require data permutation or are of unsupported types of
1166 operation. Otherwise, return 0.
1167 The value returned is the depth in the SLP tree where a mismatch
1168 was found. */
1169
1170 static slp_tree
1171 vect_build_slp_tree_2 (vec_info *vinfo,
1172 vec<stmt_vec_info> stmts, unsigned int group_size,
1173 poly_uint64 *max_nunits,
1174 bool *matches, unsigned *npermutes, unsigned *tree_size,
1175 scalar_stmts_to_slp_tree_map_t *bst_map)
1176 {
1177 unsigned nops, i, this_tree_size = 0;
1178 poly_uint64 this_max_nunits = *max_nunits;
1179 slp_tree node;
1180
1181 matches[0] = false;
1182
1183 stmt_vec_info stmt_info = stmts[0];
1184 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1185 nops = gimple_call_num_args (stmt);
1186 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1187 {
1188 nops = gimple_num_ops (stmt) - 1;
1189 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1190 nops++;
1191 }
1192 else if (is_a <gphi *> (stmt_info->stmt))
1193 nops = 0;
1194 else
1195 return NULL;
1196
1197 /* If the SLP node is a PHI (induction or reduction), terminate
1198 the recursion. */
1199 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1200 {
1201 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1202 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
1203 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1204 return NULL;
1205
1206 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1207 /* Induction from different IVs is not supported. */
1208 if (def_type == vect_induction_def)
1209 {
1210 stmt_vec_info other_info;
1211 FOR_EACH_VEC_ELT (stmts, i, other_info)
1212 if (stmt_info != other_info)
1213 return NULL;
1214 }
1215 else if (def_type == vect_reduction_def
1216 || def_type == vect_double_reduction_def
1217 || def_type == vect_nested_cycle)
1218 {
1219 /* Else def types have to match. */
1220 stmt_vec_info other_info;
1221 FOR_EACH_VEC_ELT (stmts, i, other_info)
1222 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1223 return NULL;
1224 }
1225 else
1226 return NULL;
1227 (*tree_size)++;
1228 node = vect_create_new_slp_node (stmts);
1229 return node;
1230 }
1231
1232
1233 bool two_operators = false;
1234 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1235 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1236 &this_max_nunits, matches, &two_operators))
1237 return NULL;
1238
1239 /* If the SLP node is a load, terminate the recursion unless masked. */
1240 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1241 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1242 {
1243 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1244 {
1245 /* Masked load. */
1246 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1247 nops = 1;
1248 }
1249 else
1250 {
1251 *max_nunits = this_max_nunits;
1252 (*tree_size)++;
1253 node = vect_create_new_slp_node (stmts);
1254 return node;
1255 }
1256 }
1257
1258 /* Get at the operands, verifying they are compatible. */
1259 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1260 slp_oprnd_info oprnd_info;
1261 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1262 {
1263 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1264 stmts, i, &oprnds_info);
1265 if (res != 0)
1266 matches[(res == -1) ? 0 : i] = false;
1267 if (!matches[0])
1268 break;
1269 }
1270 for (i = 0; i < group_size; ++i)
1271 if (!matches[i])
1272 {
1273 vect_free_oprnd_info (oprnds_info);
1274 return NULL;
1275 }
1276
1277 auto_vec<slp_tree, 4> children;
1278
1279 stmt_info = stmts[0];
1280
1281 /* Create SLP_TREE nodes for the definition node/s. */
1282 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1283 {
1284 slp_tree child;
1285 unsigned old_tree_size = this_tree_size;
1286 unsigned int j;
1287
1288 if (oprnd_info->first_dt == vect_uninitialized_def)
1289 {
1290 /* COND_EXPR have one too many eventually if the condition
1291 is a SSA name. */
1292 gcc_assert (i == 3 && nops == 4);
1293 continue;
1294 }
1295
1296 if (oprnd_info->first_dt != vect_internal_def
1297 && oprnd_info->first_dt != vect_reduction_def
1298 && oprnd_info->first_dt != vect_induction_def)
1299 {
1300 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1301 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1302 oprnd_info->ops = vNULL;
1303 children.safe_push (invnode);
1304 continue;
1305 }
1306
1307 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1308 group_size, &this_max_nunits,
1309 matches, npermutes,
1310 &this_tree_size, bst_map)) != NULL)
1311 {
1312 /* If we have all children of child built up from scalars then just
1313 throw that away and build it up this node from scalars. */
1314 if (is_a <bb_vec_info> (vinfo)
1315 && !SLP_TREE_CHILDREN (child).is_empty ()
1316 /* ??? Rejecting patterns this way doesn't work. We'd have to
1317 do extra work to cancel the pattern so the uses see the
1318 scalar version. */
1319 && !oprnd_info->any_pattern)
1320 {
1321 slp_tree grandchild;
1322
1323 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1324 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1325 break;
1326 if (!grandchild)
1327 {
1328 /* Roll back. */
1329 this_tree_size = old_tree_size;
1330 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1331 vect_free_slp_tree (grandchild, false);
1332 SLP_TREE_CHILDREN (child).truncate (0);
1333
1334 if (dump_enabled_p ())
1335 dump_printf_loc (MSG_NOTE, vect_location,
1336 "Building parent vector operands from "
1337 "scalars instead\n");
1338 oprnd_info->def_stmts = vNULL;
1339 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1340 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1341 oprnd_info->ops = vNULL;
1342 ++this_tree_size;
1343 children.safe_push (child);
1344 continue;
1345 }
1346 }
1347
1348 oprnd_info->def_stmts = vNULL;
1349 children.safe_push (child);
1350 continue;
1351 }
1352
1353 /* If the SLP build failed fatally and we analyze a basic-block
1354 simply treat nodes we fail to build as externally defined
1355 (and thus build vectors from the scalar defs).
1356 The cost model will reject outright expensive cases.
1357 ??? This doesn't treat cases where permutation ultimatively
1358 fails (or we don't try permutation below). Ideally we'd
1359 even compute a permutation that will end up with the maximum
1360 SLP tree size... */
1361 if (is_a <bb_vec_info> (vinfo)
1362 && !matches[0]
1363 /* ??? Rejecting patterns this way doesn't work. We'd have to
1364 do extra work to cancel the pattern so the uses see the
1365 scalar version. */
1366 && !is_pattern_stmt_p (stmt_info)
1367 && !oprnd_info->any_pattern)
1368 {
1369 if (dump_enabled_p ())
1370 dump_printf_loc (MSG_NOTE, vect_location,
1371 "Building vector operands from scalars\n");
1372 this_tree_size++;
1373 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1374 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1375 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1376 children.safe_push (child);
1377 oprnd_info->ops = vNULL;
1378 oprnd_info->def_stmts = vNULL;
1379 continue;
1380 }
1381
1382 /* If the SLP build for operand zero failed and operand zero
1383 and one can be commutated try that for the scalar stmts
1384 that failed the match. */
1385 if (i == 0
1386 /* A first scalar stmt mismatch signals a fatal mismatch. */
1387 && matches[0]
1388 /* ??? For COND_EXPRs we can swap the comparison operands
1389 as well as the arms under some constraints. */
1390 && nops == 2
1391 && oprnds_info[1]->first_dt == vect_internal_def
1392 && is_gimple_assign (stmt_info->stmt)
1393 /* Swapping operands for reductions breaks assumptions later on. */
1394 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1395 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1396 /* Do so only if the number of not successful permutes was nor more
1397 than a cut-ff as re-trying the recursive match on
1398 possibly each level of the tree would expose exponential
1399 behavior. */
1400 && *npermutes < 4)
1401 {
1402 /* See whether we can swap the matching or the non-matching
1403 stmt operands. */
1404 bool swap_not_matching = true;
1405 do
1406 {
1407 for (j = 0; j < group_size; ++j)
1408 {
1409 if (matches[j] != !swap_not_matching)
1410 continue;
1411 stmt_vec_info stmt_info = stmts[j];
1412 /* Verify if we can swap operands of this stmt. */
1413 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1414 if (!stmt
1415 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1416 {
1417 if (!swap_not_matching)
1418 goto fail;
1419 swap_not_matching = false;
1420 break;
1421 }
1422 }
1423 }
1424 while (j != group_size);
1425
1426 /* Swap mismatched definition stmts. */
1427 if (dump_enabled_p ())
1428 dump_printf_loc (MSG_NOTE, vect_location,
1429 "Re-trying with swapped operands of stmts ");
1430 for (j = 0; j < group_size; ++j)
1431 if (matches[j] == !swap_not_matching)
1432 {
1433 std::swap (oprnds_info[0]->def_stmts[j],
1434 oprnds_info[1]->def_stmts[j]);
1435 std::swap (oprnds_info[0]->ops[j],
1436 oprnds_info[1]->ops[j]);
1437 if (dump_enabled_p ())
1438 dump_printf (MSG_NOTE, "%d ", j);
1439 }
1440 if (dump_enabled_p ())
1441 dump_printf (MSG_NOTE, "\n");
1442 /* And try again with scratch 'matches' ... */
1443 bool *tem = XALLOCAVEC (bool, group_size);
1444 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1445 group_size, &this_max_nunits,
1446 tem, npermutes,
1447 &this_tree_size, bst_map)) != NULL)
1448 {
1449 /* If we have all children of child built up from scalars then
1450 just throw that away and build it up this node from scalars. */
1451 if (is_a <bb_vec_info> (vinfo)
1452 && !SLP_TREE_CHILDREN (child).is_empty ()
1453 /* ??? Rejecting patterns this way doesn't work. We'd have
1454 to do extra work to cancel the pattern so the uses see the
1455 scalar version. */
1456 && !oprnd_info->any_pattern)
1457 {
1458 unsigned int j;
1459 slp_tree grandchild;
1460
1461 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1462 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1463 break;
1464 if (!grandchild)
1465 {
1466 /* Roll back. */
1467 this_tree_size = old_tree_size;
1468 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1469 vect_free_slp_tree (grandchild, false);
1470 SLP_TREE_CHILDREN (child).truncate (0);
1471
1472 if (dump_enabled_p ())
1473 dump_printf_loc (MSG_NOTE, vect_location,
1474 "Building parent vector operands from "
1475 "scalars instead\n");
1476 oprnd_info->def_stmts = vNULL;
1477 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1478 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1479 oprnd_info->ops = vNULL;
1480 ++this_tree_size;
1481 children.safe_push (child);
1482 continue;
1483 }
1484 }
1485
1486 oprnd_info->def_stmts = vNULL;
1487 children.safe_push (child);
1488 continue;
1489 }
1490
1491 ++*npermutes;
1492 }
1493
1494 fail:
1495 gcc_assert (child == NULL);
1496 FOR_EACH_VEC_ELT (children, j, child)
1497 vect_free_slp_tree (child, false);
1498 vect_free_oprnd_info (oprnds_info);
1499 return NULL;
1500 }
1501
1502 vect_free_oprnd_info (oprnds_info);
1503
1504 *tree_size += this_tree_size + 1;
1505 *max_nunits = this_max_nunits;
1506
1507 node = vect_create_new_slp_node (stmts);
1508 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1509 SLP_TREE_CHILDREN (node).splice (children);
1510 return node;
1511 }
1512
1513 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1514
1515 static void
1516 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1517 slp_tree node, hash_set<slp_tree> &visited)
1518 {
1519 unsigned i;
1520 stmt_vec_info stmt_info;
1521 slp_tree child;
1522 tree op;
1523
1524 if (visited.add (node))
1525 return;
1526
1527 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1528 dump_user_location_t user_loc = loc.get_user_location ();
1529 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
1530 SLP_TREE_DEF_TYPE (node) == vect_external_def
1531 ? " (external)"
1532 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1533 ? " (constant)"
1534 : ""), node,
1535 estimated_poly_value (node->max_nunits));
1536 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1537 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1538 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1539 else
1540 {
1541 dump_printf_loc (metadata, user_loc, "\t{ ");
1542 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1543 dump_printf (metadata, "%T%s ", op,
1544 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1545 dump_printf (metadata, "}\n");
1546 }
1547 if (SLP_TREE_CHILDREN (node).is_empty ())
1548 return;
1549 dump_printf_loc (metadata, user_loc, "\tchildren");
1550 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1551 dump_printf (dump_kind, " %p", (void *)child);
1552 dump_printf (dump_kind, "\n");
1553 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1554 vect_print_slp_tree (dump_kind, loc, child, visited);
1555 }
1556
1557 static void
1558 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1559 slp_tree node)
1560 {
1561 hash_set<slp_tree> visited;
1562 vect_print_slp_tree (dump_kind, loc, node, visited);
1563 }
1564
1565 /* Mark the tree rooted at NODE with PURE_SLP. */
1566
1567 static void
1568 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1569 {
1570 int i;
1571 stmt_vec_info stmt_info;
1572 slp_tree child;
1573
1574 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1575 return;
1576
1577 if (visited.add (node))
1578 return;
1579
1580 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1581 STMT_SLP_TYPE (stmt_info) = pure_slp;
1582
1583 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1584 vect_mark_slp_stmts (child, visited);
1585 }
1586
1587 static void
1588 vect_mark_slp_stmts (slp_tree node)
1589 {
1590 hash_set<slp_tree> visited;
1591 vect_mark_slp_stmts (node, visited);
1592 }
1593
1594 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1595
1596 static void
1597 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1598 {
1599 int i;
1600 stmt_vec_info stmt_info;
1601 slp_tree child;
1602
1603 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1604 return;
1605
1606 if (visited.add (node))
1607 return;
1608
1609 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1610 {
1611 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1612 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1613 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1614 }
1615
1616 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1617 vect_mark_slp_stmts_relevant (child, visited);
1618 }
1619
1620 static void
1621 vect_mark_slp_stmts_relevant (slp_tree node)
1622 {
1623 hash_set<slp_tree> visited;
1624 vect_mark_slp_stmts_relevant (node, visited);
1625 }
1626
1627
1628 /* Rearrange the statements of NODE according to PERMUTATION. */
1629
1630 static void
1631 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1632 vec<unsigned> permutation,
1633 hash_set<slp_tree> &visited)
1634 {
1635 unsigned int i;
1636 slp_tree child;
1637
1638 if (visited.add (node))
1639 return;
1640
1641 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1642 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1643
1644 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1645 {
1646 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1647 vec<stmt_vec_info> tmp_stmts;
1648 tmp_stmts.create (group_size);
1649 tmp_stmts.quick_grow (group_size);
1650 stmt_vec_info stmt_info;
1651 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1652 tmp_stmts[permutation[i]] = stmt_info;
1653 SLP_TREE_SCALAR_STMTS (node).release ();
1654 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1655 }
1656 if (SLP_TREE_SCALAR_OPS (node).exists ())
1657 {
1658 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1659 vec<tree> tmp_ops;
1660 tmp_ops.create (group_size);
1661 tmp_ops.quick_grow (group_size);
1662 tree op;
1663 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1664 tmp_ops[permutation[i]] = op;
1665 SLP_TREE_SCALAR_OPS (node).release ();
1666 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1667 }
1668 }
1669
1670
1671 /* Attempt to reorder stmts in a reduction chain so that we don't
1672 require any load permutation. Return true if that was possible,
1673 otherwise return false. */
1674
1675 static bool
1676 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1677 {
1678 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1679 unsigned int i, j;
1680 unsigned int lidx;
1681 slp_tree node, load;
1682
1683 /* Compare all the permutation sequences to the first one. We know
1684 that at least one load is permuted. */
1685 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1686 if (!node->load_permutation.exists ())
1687 return false;
1688 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1689 {
1690 if (!load->load_permutation.exists ())
1691 return false;
1692 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1693 if (lidx != node->load_permutation[j])
1694 return false;
1695 }
1696
1697 /* Check that the loads in the first sequence are different and there
1698 are no gaps between them. */
1699 auto_sbitmap load_index (group_size);
1700 bitmap_clear (load_index);
1701 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1702 {
1703 if (lidx >= group_size)
1704 return false;
1705 if (bitmap_bit_p (load_index, lidx))
1706 return false;
1707
1708 bitmap_set_bit (load_index, lidx);
1709 }
1710 for (i = 0; i < group_size; i++)
1711 if (!bitmap_bit_p (load_index, i))
1712 return false;
1713
1714 /* This permutation is valid for reduction. Since the order of the
1715 statements in the nodes is not important unless they are memory
1716 accesses, we can rearrange the statements in all the nodes
1717 according to the order of the loads. */
1718 hash_set<slp_tree> visited;
1719 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1720 node->load_permutation, visited);
1721
1722 /* We are done, no actual permutations need to be generated. */
1723 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1724 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1725 {
1726 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1727 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1728 /* But we have to keep those permutations that are required because
1729 of handling of gaps. */
1730 if (known_eq (unrolling_factor, 1U)
1731 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1732 && DR_GROUP_GAP (first_stmt_info) == 0))
1733 SLP_TREE_LOAD_PERMUTATION (node).release ();
1734 else
1735 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1736 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1737 }
1738
1739 return true;
1740 }
1741
1742 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1743
1744 static void
1745 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1746 hash_set<slp_tree> &visited)
1747 {
1748 if (visited.add (node))
1749 return;
1750
1751 if (SLP_TREE_CHILDREN (node).length () == 0)
1752 {
1753 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1754 return;
1755 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1756 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1757 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1758 SLP_INSTANCE_LOADS (inst).safe_push (node);
1759 }
1760 else
1761 {
1762 unsigned i;
1763 slp_tree child;
1764 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1765 vect_gather_slp_loads (inst, child, visited);
1766 }
1767 }
1768
1769 static void
1770 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1771 {
1772 hash_set<slp_tree> visited;
1773 vect_gather_slp_loads (inst, node, visited);
1774 }
1775
1776 /* Check if the required load permutations in the SLP instance
1777 SLP_INSTN are supported. */
1778
1779 static bool
1780 vect_supported_load_permutation_p (slp_instance slp_instn)
1781 {
1782 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1783 unsigned int i, j, k, next;
1784 slp_tree node;
1785
1786 if (dump_enabled_p ())
1787 {
1788 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1789 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1790 if (node->load_permutation.exists ())
1791 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1792 dump_printf (MSG_NOTE, "%d ", next);
1793 else
1794 for (k = 0; k < group_size; ++k)
1795 dump_printf (MSG_NOTE, "%d ", k);
1796 dump_printf (MSG_NOTE, "\n");
1797 }
1798
1799 /* In case of reduction every load permutation is allowed, since the order
1800 of the reduction statements is not important (as opposed to the case of
1801 grouped stores). The only condition we need to check is that all the
1802 load nodes are of the same size and have the same permutation (and then
1803 rearrange all the nodes of the SLP instance according to this
1804 permutation). */
1805
1806 /* Check that all the load nodes are of the same size. */
1807 /* ??? Can't we assert this? */
1808 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1809 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1810 return false;
1811
1812 node = SLP_INSTANCE_TREE (slp_instn);
1813 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1814
1815 /* Reduction (there are no data-refs in the root).
1816 In reduction chain the order of the loads is not important. */
1817 if (!STMT_VINFO_DATA_REF (stmt_info)
1818 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1819 vect_attempt_slp_rearrange_stmts (slp_instn);
1820
1821 /* In basic block vectorization we allow any subchain of an interleaving
1822 chain.
1823 FORNOW: not supported in loop SLP because of realignment compications. */
1824 if (STMT_VINFO_BB_VINFO (stmt_info))
1825 {
1826 /* Check whether the loads in an instance form a subchain and thus
1827 no permutation is necessary. */
1828 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1829 {
1830 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1831 continue;
1832 bool subchain_p = true;
1833 stmt_vec_info next_load_info = NULL;
1834 stmt_vec_info load_info;
1835 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1836 {
1837 if (j != 0
1838 && (next_load_info != load_info
1839 || DR_GROUP_GAP (load_info) != 1))
1840 {
1841 subchain_p = false;
1842 break;
1843 }
1844 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1845 }
1846 if (subchain_p)
1847 SLP_TREE_LOAD_PERMUTATION (node).release ();
1848 else
1849 {
1850 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1851 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1852 unsigned HOST_WIDE_INT nunits;
1853 unsigned k, maxk = 0;
1854 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1855 if (k > maxk)
1856 maxk = k;
1857 /* In BB vectorization we may not actually use a loaded vector
1858 accessing elements in excess of DR_GROUP_SIZE. */
1859 tree vectype = STMT_VINFO_VECTYPE (group_info);
1860 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1861 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1862 {
1863 if (dump_enabled_p ())
1864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1865 "BB vectorization with gaps at the end of "
1866 "a load is not supported\n");
1867 return false;
1868 }
1869
1870 /* Verify the permutation can be generated. */
1871 vec<tree> tem;
1872 unsigned n_perms;
1873 if (!vect_transform_slp_perm_load (node, tem, NULL,
1874 1, slp_instn, true, &n_perms))
1875 {
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1878 vect_location,
1879 "unsupported load permutation\n");
1880 return false;
1881 }
1882 }
1883 }
1884 return true;
1885 }
1886
1887 /* For loop vectorization verify we can generate the permutation. Be
1888 conservative about the vectorization factor, there are permutations
1889 that will use three vector inputs only starting from a specific factor
1890 and the vectorization factor is not yet final.
1891 ??? The SLP instance unrolling factor might not be the maximum one. */
1892 unsigned n_perms;
1893 poly_uint64 test_vf
1894 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1895 LOOP_VINFO_VECT_FACTOR
1896 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1897 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1898 if (node->load_permutation.exists ()
1899 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1900 slp_instn, true, &n_perms))
1901 return false;
1902
1903 return true;
1904 }
1905
1906
1907 /* Find the last store in SLP INSTANCE. */
1908
1909 stmt_vec_info
1910 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1911 {
1912 stmt_vec_info last = NULL;
1913 stmt_vec_info stmt_vinfo;
1914
1915 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1916 {
1917 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1918 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1919 }
1920
1921 return last;
1922 }
1923
1924 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1925 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1926 (also containing the first GROUP1_SIZE stmts, since stores are
1927 consecutive), the second containing the remainder.
1928 Return the first stmt in the second group. */
1929
1930 static stmt_vec_info
1931 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1932 {
1933 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1934 gcc_assert (group1_size > 0);
1935 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1936 gcc_assert (group2_size > 0);
1937 DR_GROUP_SIZE (first_vinfo) = group1_size;
1938
1939 stmt_vec_info stmt_info = first_vinfo;
1940 for (unsigned i = group1_size; i > 1; i--)
1941 {
1942 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1943 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1944 }
1945 /* STMT is now the last element of the first group. */
1946 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1947 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1948
1949 DR_GROUP_SIZE (group2) = group2_size;
1950 for (stmt_info = group2; stmt_info;
1951 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1952 {
1953 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1954 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1955 }
1956
1957 /* For the second group, the DR_GROUP_GAP is that before the original group,
1958 plus skipping over the first vector. */
1959 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1960
1961 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1962 DR_GROUP_GAP (first_vinfo) += group2_size;
1963
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1966 group1_size, group2_size);
1967
1968 return group2;
1969 }
1970
1971 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1972 statements and a vector of NUNITS elements. */
1973
1974 static poly_uint64
1975 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1976 {
1977 return exact_div (common_multiple (nunits, group_size), group_size);
1978 }
1979
1980 /* Analyze an SLP instance starting from a group of grouped stores. Call
1981 vect_build_slp_tree to build a tree of packed stmts if possible.
1982 Return FALSE if it's impossible to SLP any stmt in the loop. */
1983
1984 static bool
1985 vect_analyze_slp_instance (vec_info *vinfo,
1986 stmt_vec_info stmt_info, unsigned max_tree_size)
1987 {
1988 slp_instance new_instance;
1989 slp_tree node;
1990 unsigned int group_size;
1991 tree vectype, scalar_type = NULL_TREE;
1992 unsigned int i;
1993 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1994 vec<stmt_vec_info> scalar_stmts;
1995 bool constructor = false;
1996
1997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1998 {
1999 scalar_type = TREE_TYPE (DR_REF (dr));
2000 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
2001 group_size = DR_GROUP_SIZE (stmt_info);
2002 }
2003 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2004 {
2005 gcc_assert (is_a <loop_vec_info> (vinfo));
2006 vectype = STMT_VINFO_VECTYPE (stmt_info);
2007 group_size = REDUC_GROUP_SIZE (stmt_info);
2008 }
2009 else if (is_gimple_assign (stmt_info->stmt)
2010 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2011 {
2012 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2013 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2014 constructor = true;
2015 }
2016 else
2017 {
2018 gcc_assert (is_a <loop_vec_info> (vinfo));
2019 vectype = STMT_VINFO_VECTYPE (stmt_info);
2020 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2021 }
2022
2023 if (!vectype)
2024 {
2025 if (dump_enabled_p ())
2026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2027 "Build SLP failed: unsupported data-type %T\n",
2028 scalar_type);
2029
2030 return false;
2031 }
2032 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2033
2034 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2035 scalar_stmts.create (group_size);
2036 stmt_vec_info next_info = stmt_info;
2037 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2038 {
2039 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2040 while (next_info)
2041 {
2042 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2043 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2044 }
2045 }
2046 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2047 {
2048 /* Collect the reduction stmts and store them in
2049 SLP_TREE_SCALAR_STMTS. */
2050 while (next_info)
2051 {
2052 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2053 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2054 }
2055 /* Mark the first element of the reduction chain as reduction to properly
2056 transform the node. In the reduction analysis phase only the last
2057 element of the chain is marked as reduction. */
2058 STMT_VINFO_DEF_TYPE (stmt_info)
2059 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
2060 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2061 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
2062 }
2063 else if (constructor)
2064 {
2065 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2066 tree val;
2067 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2068 {
2069 if (TREE_CODE (val) == SSA_NAME)
2070 {
2071 gimple* def = SSA_NAME_DEF_STMT (val);
2072 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2073 /* Value is defined in another basic block. */
2074 if (!def_info)
2075 return false;
2076 scalar_stmts.safe_push (def_info);
2077 }
2078 else
2079 return false;
2080 }
2081 }
2082 else
2083 {
2084 /* Collect reduction statements. */
2085 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2086 for (i = 0; reductions.iterate (i, &next_info); i++)
2087 scalar_stmts.safe_push (next_info);
2088 }
2089
2090 /* Build the tree for the SLP instance. */
2091 bool *matches = XALLOCAVEC (bool, group_size);
2092 unsigned npermutes = 0;
2093 scalar_stmts_to_slp_tree_map_t *bst_map
2094 = new scalar_stmts_to_slp_tree_map_t ();
2095 poly_uint64 max_nunits = nunits;
2096 unsigned tree_size = 0;
2097 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2098 &max_nunits, matches, &npermutes,
2099 &tree_size, bst_map);
2100 /* The map keeps a reference on SLP nodes built, release that. */
2101 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2102 it != bst_map->end (); ++it)
2103 if ((*it).second)
2104 vect_free_slp_tree ((*it).second, false);
2105 delete bst_map;
2106 if (node != NULL)
2107 {
2108 /* If this is a reduction chain with a conversion in front
2109 amend the SLP tree with a node for that. */
2110 if (!dr
2111 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2112 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2113 {
2114 /* Get at the conversion stmt - we know it's the single use
2115 of the last stmt of the reduction chain. */
2116 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2117 use_operand_p use_p;
2118 gimple *use_stmt;
2119 bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
2120 gcc_assert (r);
2121 next_info = vinfo->lookup_stmt (use_stmt);
2122 next_info = vect_stmt_to_vectorize (next_info);
2123 scalar_stmts = vNULL;
2124 scalar_stmts.create (group_size);
2125 for (unsigned i = 0; i < group_size; ++i)
2126 scalar_stmts.quick_push (next_info);
2127 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2128 SLP_TREE_CHILDREN (conv).quick_push (node);
2129 node = conv;
2130 /* We also have to fake this conversion stmt as SLP reduction group
2131 so we don't have to mess with too much code elsewhere. */
2132 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2133 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2134 }
2135
2136 /* Calculate the unrolling factor based on the smallest type. */
2137 poly_uint64 unrolling_factor
2138 = calculate_unrolling_factor (max_nunits, group_size);
2139
2140 if (maybe_ne (unrolling_factor, 1U)
2141 && is_a <bb_vec_info> (vinfo))
2142 {
2143 unsigned HOST_WIDE_INT const_max_nunits;
2144 if (!max_nunits.is_constant (&const_max_nunits)
2145 || const_max_nunits > group_size)
2146 {
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2149 "Build SLP failed: store group "
2150 "size not a multiple of the vector size "
2151 "in basic block SLP\n");
2152 vect_free_slp_tree (node, false);
2153 return false;
2154 }
2155 /* Fatal mismatch. */
2156 matches[group_size / const_max_nunits * const_max_nunits] = false;
2157 vect_free_slp_tree (node, false);
2158 }
2159 else
2160 {
2161 /* Create a new SLP instance. */
2162 new_instance = XNEW (class _slp_instance);
2163 SLP_INSTANCE_TREE (new_instance) = node;
2164 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2165 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2166 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2167 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2168
2169 vect_gather_slp_loads (new_instance, node);
2170 if (dump_enabled_p ())
2171 dump_printf_loc (MSG_NOTE, vect_location,
2172 "SLP size %u vs. limit %u.\n",
2173 tree_size, max_tree_size);
2174
2175 /* Compute the load permutation. */
2176 slp_tree load_node;
2177 bool loads_permuted = false;
2178 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2179 {
2180 vec<unsigned> load_permutation;
2181 int j;
2182 stmt_vec_info load_info;
2183 bool this_load_permuted = false;
2184 load_permutation.create (group_size);
2185 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2186 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2187 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2188 {
2189 int load_place = vect_get_place_in_interleaving_chain
2190 (load_info, first_stmt_info);
2191 gcc_assert (load_place != -1);
2192 if (load_place != j)
2193 this_load_permuted = true;
2194 load_permutation.safe_push (load_place);
2195 }
2196 if (!this_load_permuted
2197 /* The load requires permutation when unrolling exposes
2198 a gap either because the group is larger than the SLP
2199 group-size or because there is a gap between the groups. */
2200 && (known_eq (unrolling_factor, 1U)
2201 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2202 && DR_GROUP_GAP (first_stmt_info) == 0)))
2203 {
2204 load_permutation.release ();
2205 continue;
2206 }
2207 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2208 loads_permuted = true;
2209 }
2210
2211 if (loads_permuted)
2212 {
2213 if (!vect_supported_load_permutation_p (new_instance))
2214 {
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2217 "Build SLP failed: unsupported load "
2218 "permutation %G", stmt_info->stmt);
2219 vect_free_slp_instance (new_instance, false);
2220 return false;
2221 }
2222 }
2223
2224 /* If the loads and stores can be handled with load/store-lan
2225 instructions do not generate this SLP instance. */
2226 if (is_a <loop_vec_info> (vinfo)
2227 && loads_permuted
2228 && dr && vect_store_lanes_supported (vectype, group_size, false))
2229 {
2230 slp_tree load_node;
2231 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2232 {
2233 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2234 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2235 /* Use SLP for strided accesses (or if we can't load-lanes). */
2236 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2237 || ! vect_load_lanes_supported
2238 (STMT_VINFO_VECTYPE (stmt_vinfo),
2239 DR_GROUP_SIZE (stmt_vinfo), false))
2240 break;
2241 }
2242 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2243 {
2244 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246 "Built SLP cancelled: can use "
2247 "load/store-lanes\n");
2248 vect_free_slp_instance (new_instance, false);
2249 return false;
2250 }
2251 }
2252
2253 vinfo->slp_instances.safe_push (new_instance);
2254
2255 if (dump_enabled_p ())
2256 {
2257 dump_printf_loc (MSG_NOTE, vect_location,
2258 "Final SLP tree for instance:\n");
2259 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2260 }
2261
2262 return true;
2263 }
2264 }
2265 else
2266 {
2267 /* Failed to SLP. */
2268 /* Free the allocated memory. */
2269 scalar_stmts.release ();
2270 }
2271
2272 /* For basic block SLP, try to break the group up into multiples of the
2273 vector size. */
2274 unsigned HOST_WIDE_INT const_nunits;
2275 if (is_a <bb_vec_info> (vinfo)
2276 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2277 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2278 && nunits.is_constant (&const_nunits))
2279 {
2280 /* We consider breaking the group only on VF boundaries from the existing
2281 start. */
2282 for (i = 0; i < group_size; i++)
2283 if (!matches[i]) break;
2284
2285 if (i >= const_nunits && i < group_size)
2286 {
2287 /* Split into two groups at the first vector boundary before i. */
2288 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2289 unsigned group1_size = i & ~(const_nunits - 1);
2290
2291 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2292 group1_size);
2293 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2294 max_tree_size);
2295 /* If the first non-match was in the middle of a vector,
2296 skip the rest of that vector. */
2297 if (group1_size < i)
2298 {
2299 i = group1_size + const_nunits;
2300 if (i < group_size)
2301 rest = vect_split_slp_store_group (rest, const_nunits);
2302 }
2303 if (i < group_size)
2304 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2305 return res;
2306 }
2307 /* Even though the first vector did not all match, we might be able to SLP
2308 (some) of the remainder. FORNOW ignore this possibility. */
2309 }
2310
2311 return false;
2312 }
2313
2314
2315 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2316 trees of packed scalar stmts if SLP is possible. */
2317
2318 opt_result
2319 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2320 {
2321 unsigned int i;
2322 stmt_vec_info first_element;
2323
2324 DUMP_VECT_SCOPE ("vect_analyze_slp");
2325
2326 /* Find SLP sequences starting from groups of grouped stores. */
2327 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2328 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2329
2330 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2331 {
2332 if (loop_vinfo->reduction_chains.length () > 0)
2333 {
2334 /* Find SLP sequences starting from reduction chains. */
2335 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2336 if (! vect_analyze_slp_instance (vinfo, first_element,
2337 max_tree_size))
2338 {
2339 /* Dissolve reduction chain group. */
2340 stmt_vec_info vinfo = first_element;
2341 stmt_vec_info last = NULL;
2342 while (vinfo)
2343 {
2344 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2345 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2346 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2347 last = vinfo;
2348 vinfo = next;
2349 }
2350 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2351 /* It can be still vectorized as part of an SLP reduction. */
2352 loop_vinfo->reductions.safe_push (last);
2353 }
2354 }
2355
2356 /* Find SLP sequences starting from groups of reductions. */
2357 if (loop_vinfo->reductions.length () > 1)
2358 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2359 max_tree_size);
2360 }
2361
2362 return opt_result::success ();
2363 }
2364
2365
2366 /* For each possible SLP instance decide whether to SLP it and calculate overall
2367 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2368 least one instance. */
2369
2370 bool
2371 vect_make_slp_decision (loop_vec_info loop_vinfo)
2372 {
2373 unsigned int i;
2374 poly_uint64 unrolling_factor = 1;
2375 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2376 slp_instance instance;
2377 int decided_to_slp = 0;
2378
2379 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2380
2381 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2382 {
2383 /* FORNOW: SLP if you can. */
2384 /* All unroll factors have the form vinfo->vector_size * X for some
2385 rational X, so they must have a common multiple. */
2386 unrolling_factor
2387 = force_common_multiple (unrolling_factor,
2388 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2389
2390 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2391 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2392 loop-based vectorization. Such stmts will be marked as HYBRID. */
2393 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2394 decided_to_slp++;
2395 }
2396
2397 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2398
2399 if (decided_to_slp && dump_enabled_p ())
2400 {
2401 dump_printf_loc (MSG_NOTE, vect_location,
2402 "Decided to SLP %d instances. Unrolling factor ",
2403 decided_to_slp);
2404 dump_dec (MSG_NOTE, unrolling_factor);
2405 dump_printf (MSG_NOTE, "\n");
2406 }
2407
2408 return (decided_to_slp > 0);
2409 }
2410
2411
2412 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2413 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2414
2415 static void
2416 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
2417 hash_map<slp_tree, unsigned> &visited)
2418 {
2419 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2420 imm_use_iterator imm_iter;
2421 gimple *use_stmt;
2422 stmt_vec_info use_vinfo;
2423 slp_tree child;
2424 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2425 int j;
2426
2427 /* We need to union stype over the incoming graph edges but we still
2428 want to limit recursion to stay O(N+E). */
2429 bool only_edge = (++visited.get_or_insert (node) < node->refcnt);
2430
2431 /* Propagate hybrid down the SLP tree. */
2432 if (stype == hybrid)
2433 ;
2434 else if (HYBRID_SLP_STMT (stmt_vinfo))
2435 stype = hybrid;
2436 else if (!only_edge)
2437 {
2438 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2439 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2440 /* If we get a pattern stmt here we have to use the LHS of the
2441 original stmt for immediate uses. */
2442 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2443 tree def;
2444 if (gimple_code (stmt) == GIMPLE_PHI)
2445 def = gimple_phi_result (stmt);
2446 else
2447 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2448 if (def)
2449 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2450 {
2451 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2452 if (!use_vinfo)
2453 continue;
2454 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2455 if (!STMT_SLP_TYPE (use_vinfo)
2456 && (STMT_VINFO_RELEVANT (use_vinfo)
2457 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2458 && !(gimple_code (use_stmt) == GIMPLE_PHI
2459 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2460 {
2461 if (dump_enabled_p ())
2462 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2463 "def in non-SLP stmt: %G", use_stmt);
2464 stype = hybrid;
2465 }
2466 }
2467 }
2468
2469 if (stype == hybrid
2470 && !HYBRID_SLP_STMT (stmt_vinfo))
2471 {
2472 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2474 stmt_vinfo->stmt);
2475 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2476 }
2477
2478 if (!only_edge)
2479 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2480 if (SLP_TREE_DEF_TYPE (child) != vect_external_def
2481 && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
2482 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
2483 }
2484
2485 static void
2486 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2487 {
2488 hash_map<slp_tree, unsigned> visited;
2489 vect_detect_hybrid_slp_stmts (node, i, stype, visited);
2490 }
2491
2492 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2493
2494 static tree
2495 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2496 {
2497 walk_stmt_info *wi = (walk_stmt_info *)data;
2498 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2499
2500 if (wi->is_lhs)
2501 return NULL_TREE;
2502
2503 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2504 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2505 {
2506 if (dump_enabled_p ())
2507 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2508 def_stmt_info->stmt);
2509 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2510 }
2511
2512 return NULL_TREE;
2513 }
2514
2515 static tree
2516 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2517 walk_stmt_info *wi)
2518 {
2519 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2520 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2521 /* If the stmt is in a SLP instance then this isn't a reason
2522 to mark use definitions in other SLP instances as hybrid. */
2523 if (! STMT_SLP_TYPE (use_vinfo)
2524 && (STMT_VINFO_RELEVANT (use_vinfo)
2525 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2526 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2527 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2528 ;
2529 else
2530 *handled = true;
2531 return NULL_TREE;
2532 }
2533
2534 /* Find stmts that must be both vectorized and SLPed. */
2535
2536 void
2537 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2538 {
2539 unsigned int i;
2540 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2541 slp_instance instance;
2542
2543 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2544
2545 /* First walk all pattern stmt in the loop and mark defs of uses as
2546 hybrid because immediate uses in them are not recorded. */
2547 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2548 {
2549 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2550 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2551 gsi_next (&gsi))
2552 {
2553 gimple *stmt = gsi_stmt (gsi);
2554 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2555 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2556 {
2557 walk_stmt_info wi;
2558 memset (&wi, 0, sizeof (wi));
2559 wi.info = loop_vinfo;
2560 gimple_stmt_iterator gsi2
2561 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2562 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2563 vect_detect_hybrid_slp_1, &wi);
2564 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2565 vect_detect_hybrid_slp_2,
2566 vect_detect_hybrid_slp_1, &wi);
2567 }
2568 }
2569 }
2570
2571 /* Then walk the SLP instance trees marking stmts with uses in
2572 non-SLP stmts as hybrid, also propagating hybrid down the
2573 SLP tree, collecting the above info on-the-fly. */
2574 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2575 {
2576 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2577 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2578 i, pure_slp);
2579 }
2580 }
2581
2582
2583 /* Initialize a bb_vec_info struct for the statements between
2584 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2585
2586 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2587 gimple_stmt_iterator region_end_in,
2588 vec_info_shared *shared)
2589 : vec_info (vec_info::bb, init_cost (NULL), shared),
2590 bb (gsi_bb (region_begin_in)),
2591 region_begin (region_begin_in),
2592 region_end (region_end_in)
2593 {
2594 gimple_stmt_iterator gsi;
2595
2596 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2597 gsi_next (&gsi))
2598 {
2599 gimple *stmt = gsi_stmt (gsi);
2600 gimple_set_uid (stmt, 0);
2601 add_stmt (stmt);
2602 }
2603
2604 bb->aux = this;
2605 }
2606
2607
2608 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2609 stmts in the basic block. */
2610
2611 _bb_vec_info::~_bb_vec_info ()
2612 {
2613 for (gimple_stmt_iterator si = region_begin;
2614 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2615 /* Reset region marker. */
2616 gimple_set_uid (gsi_stmt (si), -1);
2617
2618 bb->aux = NULL;
2619 }
2620
2621 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2622 given then that child nodes have already been processed, and that
2623 their def types currently match their SLP node's def type. */
2624
2625 static bool
2626 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2627 slp_instance node_instance,
2628 stmt_vector_for_cost *cost_vec)
2629 {
2630 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2631 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2632
2633 /* For BB vectorization vector types are assigned here.
2634 Memory accesses already got their vector type assigned
2635 in vect_analyze_data_refs. */
2636 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2637 if (bb_vinfo
2638 && ! STMT_VINFO_DATA_REF (stmt_info))
2639 {
2640 tree vectype, nunits_vectype;
2641 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2642 &nunits_vectype))
2643 /* We checked this when building the node. */
2644 gcc_unreachable ();
2645 if (vectype == boolean_type_node)
2646 {
2647 vectype = vect_get_mask_type_for_stmt (stmt_info);
2648 if (!vectype)
2649 /* vect_get_mask_type_for_stmt has already explained the
2650 failure. */
2651 return false;
2652 }
2653
2654 stmt_vec_info sstmt_info;
2655 unsigned int i;
2656 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2657 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2658 }
2659
2660 /* Calculate the number of vector statements to be created for the
2661 scalar stmts in this node. For SLP reductions it is equal to the
2662 number of vector statements in the children (which has already been
2663 calculated by the recursive call). Otherwise it is the number of
2664 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2665 VF divided by the number of elements in a vector. */
2666 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2667 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2668 {
2669 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2670 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2671 {
2672 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2673 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2674 break;
2675 }
2676 }
2677 else
2678 {
2679 poly_uint64 vf;
2680 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2681 vf = loop_vinfo->vectorization_factor;
2682 else
2683 vf = 1;
2684 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2685 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2686 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2687 = vect_get_num_vectors (vf * group_size, vectype);
2688 }
2689
2690 bool dummy;
2691 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2692 }
2693
2694 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2695 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2696
2697 Return true if the operations are supported. */
2698
2699 static bool
2700 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2701 slp_instance node_instance,
2702 scalar_stmts_to_slp_tree_map_t *visited,
2703 scalar_stmts_to_slp_tree_map_t *lvisited,
2704 stmt_vector_for_cost *cost_vec)
2705 {
2706 int i, j;
2707 slp_tree child;
2708
2709 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2710 return true;
2711
2712 /* If we already analyzed the exact same set of scalar stmts we're done.
2713 We share the generated vector stmts for those. */
2714 slp_tree *leader;
2715 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2716 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2717 {
2718 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2719 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2720 return true;
2721 }
2722
2723 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2724 doesn't result in any issue since we throw away the lvisited set
2725 when we fail. */
2726 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2727
2728 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2729 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2730 visited, lvisited, cost_vec))
2731 return false;
2732
2733 /* ??? We have to catch the case late where two first scalar stmts appear
2734 in multiple SLP children with different def type and fail. Remember
2735 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2736 match it when that is vect_internal_def. */
2737 auto_vec<vect_def_type, 4> dt;
2738 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2739 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2740 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2741 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2742
2743 /* Push SLP node def-type to stmt operands. */
2744 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2745 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2746 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
2747 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2748 = SLP_TREE_DEF_TYPE (child);
2749
2750 /* Check everything worked out. */
2751 bool res = true;
2752 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2753 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2754 {
2755 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2756 {
2757 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2758 != SLP_TREE_DEF_TYPE (child))
2759 res = false;
2760 }
2761 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2762 != dt[j])
2763 res = false;
2764 }
2765 if (!res && dump_enabled_p ())
2766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2767 "not vectorized: same operand with different "
2768 "def type in stmt.\n");
2769
2770 if (res)
2771 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2772 cost_vec);
2773
2774 /* Restore def-types. */
2775 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2776 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2777 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2778
2779 return res;
2780 }
2781
2782
2783 /* Analyze statements in SLP instances of VINFO. Return true if the
2784 operations are supported. */
2785
2786 bool
2787 vect_slp_analyze_operations (vec_info *vinfo)
2788 {
2789 slp_instance instance;
2790 int i;
2791
2792 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2793
2794 scalar_stmts_to_slp_tree_map_t *visited
2795 = new scalar_stmts_to_slp_tree_map_t ();
2796 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2797 {
2798 scalar_stmts_to_slp_tree_map_t lvisited;
2799 stmt_vector_for_cost cost_vec;
2800 cost_vec.create (2);
2801 if (!vect_slp_analyze_node_operations (vinfo,
2802 SLP_INSTANCE_TREE (instance),
2803 instance, visited, &lvisited,
2804 &cost_vec))
2805 {
2806 slp_tree node = SLP_INSTANCE_TREE (instance);
2807 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2808 if (dump_enabled_p ())
2809 dump_printf_loc (MSG_NOTE, vect_location,
2810 "removing SLP instance operations starting from: %G",
2811 stmt_info->stmt);
2812 vect_free_slp_instance (instance, false);
2813 vinfo->slp_instances.ordered_remove (i);
2814 cost_vec.release ();
2815 }
2816 else
2817 {
2818 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2819 x != lvisited.end(); ++x)
2820 visited->put ((*x).first.copy (), (*x).second);
2821 i++;
2822
2823 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2824 cost_vec.release ();
2825 }
2826 }
2827 delete visited;
2828
2829 return !vinfo->slp_instances.is_empty ();
2830 }
2831
2832
2833 /* Compute the scalar cost of the SLP node NODE and its children
2834 and return it. Do not account defs that are marked in LIFE and
2835 update LIFE according to uses of NODE. */
2836
2837 static void
2838 vect_bb_slp_scalar_cost (basic_block bb,
2839 slp_tree node, vec<bool, va_heap> *life,
2840 stmt_vector_for_cost *cost_vec,
2841 hash_set<slp_tree> &visited)
2842 {
2843 unsigned i;
2844 stmt_vec_info stmt_info;
2845 slp_tree child;
2846
2847 if (visited.add (node))
2848 return;
2849
2850 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2851 {
2852 gimple *stmt = stmt_info->stmt;
2853 vec_info *vinfo = stmt_info->vinfo;
2854 ssa_op_iter op_iter;
2855 def_operand_p def_p;
2856
2857 if ((*life)[i])
2858 continue;
2859
2860 /* If there is a non-vectorized use of the defs then the scalar
2861 stmt is kept live in which case we do not account it or any
2862 required defs in the SLP children in the scalar cost. This
2863 way we make the vectorization more costly when compared to
2864 the scalar cost. */
2865 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2866 {
2867 imm_use_iterator use_iter;
2868 gimple *use_stmt;
2869 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2870 if (!is_gimple_debug (use_stmt))
2871 {
2872 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2873 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2874 {
2875 (*life)[i] = true;
2876 BREAK_FROM_IMM_USE_STMT (use_iter);
2877 }
2878 }
2879 }
2880 if ((*life)[i])
2881 continue;
2882
2883 /* Count scalar stmts only once. */
2884 if (gimple_visited_p (stmt))
2885 continue;
2886 gimple_set_visited (stmt, true);
2887
2888 vect_cost_for_stmt kind;
2889 if (STMT_VINFO_DATA_REF (stmt_info))
2890 {
2891 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2892 kind = scalar_load;
2893 else
2894 kind = scalar_store;
2895 }
2896 else if (vect_nop_conversion_p (stmt_info))
2897 continue;
2898 else
2899 kind = scalar_stmt;
2900 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2901 }
2902
2903 auto_vec<bool, 20> subtree_life;
2904 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2905 {
2906 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2907 {
2908 /* Do not directly pass LIFE to the recursive call, copy it to
2909 confine changes in the callee to the current child/subtree. */
2910 subtree_life.safe_splice (*life);
2911 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
2912 visited);
2913 subtree_life.truncate (0);
2914 }
2915 }
2916 }
2917
2918 static void
2919 vect_bb_slp_scalar_cost (basic_block bb,
2920 slp_tree node, vec<bool, va_heap> *life,
2921 stmt_vector_for_cost *cost_vec)
2922 {
2923 hash_set<slp_tree> visited;
2924 vect_bb_slp_scalar_cost (bb, node, life, cost_vec, visited);
2925 }
2926
2927 /* Check if vectorization of the basic block is profitable. */
2928
2929 static bool
2930 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2931 {
2932 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2933 slp_instance instance;
2934 int i;
2935 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2936 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2937
2938 /* Calculate scalar cost. */
2939 stmt_vector_for_cost scalar_costs;
2940 scalar_costs.create (0);
2941 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2942 {
2943 auto_vec<bool, 20> life;
2944 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2945 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2946 SLP_INSTANCE_TREE (instance),
2947 &life, &scalar_costs);
2948 }
2949 void *target_cost_data = init_cost (NULL);
2950 add_stmt_costs (target_cost_data, &scalar_costs);
2951 scalar_costs.release ();
2952 unsigned dummy;
2953 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2954 destroy_cost_data (target_cost_data);
2955
2956 /* Unset visited flag. */
2957 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2958 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2959 gimple_set_visited (gsi_stmt (gsi), false);
2960
2961 /* Complete the target-specific cost calculation. */
2962 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2963 &vec_inside_cost, &vec_epilogue_cost);
2964
2965 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2966
2967 if (dump_enabled_p ())
2968 {
2969 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2970 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2971 vec_inside_cost);
2972 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2973 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2974 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2975 }
2976
2977 /* Vectorization is profitable if its cost is more than the cost of scalar
2978 version. Note that we err on the vector side for equal cost because
2979 the cost estimate is otherwise quite pessimistic (constant uses are
2980 free on the scalar side but cost a load on the vector side for
2981 example). */
2982 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2983 return false;
2984
2985 return true;
2986 }
2987
2988 /* Find any vectorizable constructors and add them to the grouped_store
2989 array. */
2990
2991 static void
2992 vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
2993 {
2994 gimple_stmt_iterator gsi;
2995
2996 for (gsi = bb_vinfo->region_begin;
2997 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2998 {
2999 gimple *stmt = gsi_stmt (gsi);
3000
3001 if (is_gimple_assign (stmt)
3002 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
3003 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
3004 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE)
3005 {
3006 tree rhs = gimple_assign_rhs1 (stmt);
3007
3008 if (CONSTRUCTOR_NELTS (rhs) == 0)
3009 continue;
3010
3011 poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs));
3012
3013 if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs)))
3014 continue;
3015
3016 if (dump_enabled_p ())
3017 dump_printf_loc (MSG_NOTE, vect_location,
3018 "Found vectorizable constructor: %G\n", stmt);
3019 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3020 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3021 }
3022 }
3023 }
3024
3025 /* Check if the region described by BB_VINFO can be vectorized, returning
3026 true if so. When returning false, set FATAL to true if the same failure
3027 would prevent vectorization at other vector sizes, false if it is still
3028 worth trying other sizes. N_STMTS is the number of statements in the
3029 region. */
3030
3031 static bool
3032 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
3033 {
3034 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3035
3036 slp_instance instance;
3037 int i;
3038 poly_uint64 min_vf = 2;
3039
3040 /* The first group of checks is independent of the vector size. */
3041 fatal = true;
3042
3043 /* Analyze the data references. */
3044
3045 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
3046 {
3047 if (dump_enabled_p ())
3048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3049 "not vectorized: unhandled data-ref in basic "
3050 "block.\n");
3051 return false;
3052 }
3053
3054 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
3055 {
3056 if (dump_enabled_p ())
3057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3058 "not vectorized: not enough data-refs in "
3059 "basic block.\n");
3060 return false;
3061 }
3062
3063 if (!vect_analyze_data_ref_accesses (bb_vinfo))
3064 {
3065 if (dump_enabled_p ())
3066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3067 "not vectorized: unhandled data access in "
3068 "basic block.\n");
3069 return false;
3070 }
3071
3072 vect_slp_check_for_constructors (bb_vinfo);
3073
3074 /* If there are no grouped stores in the region there is no need
3075 to continue with pattern recog as vect_analyze_slp will fail
3076 anyway. */
3077 if (bb_vinfo->grouped_stores.is_empty ())
3078 {
3079 if (dump_enabled_p ())
3080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3081 "not vectorized: no grouped stores in "
3082 "basic block.\n");
3083 return false;
3084 }
3085
3086 /* While the rest of the analysis below depends on it in some way. */
3087 fatal = false;
3088
3089 vect_pattern_recog (bb_vinfo);
3090
3091 /* Check the SLP opportunities in the basic block, analyze and build SLP
3092 trees. */
3093 if (!vect_analyze_slp (bb_vinfo, n_stmts))
3094 {
3095 if (dump_enabled_p ())
3096 {
3097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3098 "Failed to SLP the basic block.\n");
3099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3100 "not vectorized: failed to find SLP opportunities "
3101 "in basic block.\n");
3102 }
3103 return false;
3104 }
3105
3106 vect_record_base_alignments (bb_vinfo);
3107
3108 /* Analyze and verify the alignment of data references and the
3109 dependence in the SLP instances. */
3110 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3111 {
3112 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3113 || ! vect_slp_analyze_instance_dependence (instance))
3114 {
3115 slp_tree node = SLP_INSTANCE_TREE (instance);
3116 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3117 if (dump_enabled_p ())
3118 dump_printf_loc (MSG_NOTE, vect_location,
3119 "removing SLP instance operations starting from: %G",
3120 stmt_info->stmt);
3121 vect_free_slp_instance (instance, false);
3122 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3123 continue;
3124 }
3125
3126 /* Mark all the statements that we want to vectorize as pure SLP and
3127 relevant. */
3128 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
3129 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3130 if (SLP_INSTANCE_ROOT_STMT (instance))
3131 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
3132
3133 i++;
3134 }
3135 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3136 return false;
3137
3138 if (!vect_slp_analyze_operations (bb_vinfo))
3139 {
3140 if (dump_enabled_p ())
3141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3142 "not vectorized: bad operation in basic block.\n");
3143 return false;
3144 }
3145
3146 /* Cost model: check if the vectorization is worthwhile. */
3147 if (!unlimited_cost_model (NULL)
3148 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3149 {
3150 if (dump_enabled_p ())
3151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3152 "not vectorized: vectorization is not "
3153 "profitable.\n");
3154 return false;
3155 }
3156
3157 if (dump_enabled_p ())
3158 dump_printf_loc (MSG_NOTE, vect_location,
3159 "Basic block will be vectorized using SLP\n");
3160 return true;
3161 }
3162
3163 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3164 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3165 on success. The region has N_STMTS statements and has the datarefs
3166 given by DATAREFS. */
3167
3168 static bool
3169 vect_slp_bb_region (gimple_stmt_iterator region_begin,
3170 gimple_stmt_iterator region_end,
3171 vec<data_reference_p> datarefs,
3172 unsigned int n_stmts)
3173 {
3174 bb_vec_info bb_vinfo;
3175 auto_vector_modes vector_modes;
3176
3177 /* Autodetect first vector size we try. */
3178 machine_mode next_vector_mode = VOIDmode;
3179 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3180 unsigned int mode_i = 0;
3181
3182 vec_info_shared shared;
3183
3184 poly_uint64 autodetected_vector_size = 0;
3185 while (1)
3186 {
3187 bool vectorized = false;
3188 bool fatal = false;
3189 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3190
3191 bool first_time_p = shared.datarefs.is_empty ();
3192 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3193 if (first_time_p)
3194 bb_vinfo->shared->save_datarefs ();
3195 else
3196 bb_vinfo->shared->check_datarefs ();
3197 bb_vinfo->vector_size = GET_MODE_SIZE (next_vector_mode);
3198
3199 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
3200 && dbg_cnt (vect_slp))
3201 {
3202 if (dump_enabled_p ())
3203 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3204
3205 bb_vinfo->shared->check_datarefs ();
3206 vect_schedule_slp (bb_vinfo);
3207
3208 unsigned HOST_WIDE_INT bytes;
3209 if (dump_enabled_p ())
3210 {
3211 if (bb_vinfo->vector_size.is_constant (&bytes))
3212 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3213 "basic block part vectorized using %wu byte "
3214 "vectors\n", bytes);
3215 else
3216 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3217 "basic block part vectorized using variable "
3218 "length vectors\n");
3219 }
3220
3221 vectorized = true;
3222 }
3223
3224 if (mode_i == 0)
3225 autodetected_vector_size = bb_vinfo->vector_size;
3226
3227 delete bb_vinfo;
3228
3229 if (mode_i < vector_modes.length ()
3230 && known_eq (GET_MODE_SIZE (vector_modes[mode_i]),
3231 autodetected_vector_size))
3232 mode_i += 1;
3233
3234 if (vectorized
3235 || mode_i == vector_modes.length ()
3236 || known_eq (autodetected_vector_size, 0U)
3237 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3238 vector sizes will fail do not bother iterating. */
3239 || fatal)
3240 return vectorized;
3241
3242 /* Try the next biggest vector size. */
3243 next_vector_mode = vector_modes[mode_i++];
3244 if (dump_enabled_p ())
3245 dump_printf_loc (MSG_NOTE, vect_location,
3246 "***** Re-trying analysis with vector mode %s\n",
3247 GET_MODE_NAME (next_vector_mode));
3248 }
3249 }
3250
3251 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3252 true if anything in the basic-block was vectorized. */
3253
3254 bool
3255 vect_slp_bb (basic_block bb)
3256 {
3257 gimple_stmt_iterator gsi;
3258 bool any_vectorized = false;
3259
3260 gsi = gsi_start_bb (bb);
3261 while (!gsi_end_p (gsi))
3262 {
3263 gimple_stmt_iterator region_begin = gsi;
3264 vec<data_reference_p> datarefs = vNULL;
3265 int insns = 0;
3266
3267 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3268 {
3269 gimple *stmt = gsi_stmt (gsi);
3270 if (is_gimple_debug (stmt))
3271 continue;
3272 insns++;
3273
3274 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3275 vect_location = stmt;
3276
3277 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3278 break;
3279 }
3280
3281 /* Skip leading unhandled stmts. */
3282 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3283 {
3284 gsi_next (&gsi);
3285 continue;
3286 }
3287
3288 gimple_stmt_iterator region_end = gsi;
3289
3290 if (insns > param_slp_max_insns_in_bb)
3291 {
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294 "not vectorized: too many instructions in "
3295 "basic block.\n");
3296 }
3297 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
3298 any_vectorized = true;
3299
3300 if (gsi_end_p (region_end))
3301 break;
3302
3303 /* Skip the unhandled stmt. */
3304 gsi_next (&gsi);
3305 }
3306
3307 return any_vectorized;
3308 }
3309
3310
3311 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3312
3313 static bool
3314 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
3315 {
3316 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3317 tree op, vectype;
3318 enum vect_def_type dt;
3319
3320 /* For comparison and COND_EXPR type is chosen depending
3321 on the non-constant other comparison operand. */
3322 if (TREE_CODE_CLASS (code) == tcc_comparison)
3323 {
3324 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3325 op = gimple_assign_rhs1 (stmt);
3326
3327 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3328 gcc_unreachable ();
3329
3330 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3331 }
3332
3333 if (code == COND_EXPR)
3334 {
3335 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3336 tree cond = gimple_assign_rhs1 (stmt);
3337
3338 if (TREE_CODE (cond) == SSA_NAME)
3339 op = cond;
3340 else
3341 op = TREE_OPERAND (cond, 0);
3342
3343 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3344 gcc_unreachable ();
3345
3346 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3347 }
3348
3349 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3350 }
3351
3352 /* Build a variable-length vector in which the elements in ELTS are repeated
3353 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3354 RESULTS and add any new instructions to SEQ.
3355
3356 The approach we use is:
3357
3358 (1) Find a vector mode VM with integer elements of mode IM.
3359
3360 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3361 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3362 from small vectors to IM.
3363
3364 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3365
3366 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3367 correct byte contents.
3368
3369 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3370
3371 We try to find the largest IM for which this sequence works, in order
3372 to cut down on the number of interleaves. */
3373
3374 void
3375 duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
3376 vec<tree> elts, unsigned int nresults,
3377 vec<tree> &results)
3378 {
3379 unsigned int nelts = elts.length ();
3380 tree element_type = TREE_TYPE (vector_type);
3381
3382 /* (1) Find a vector mode VM with integer elements of mode IM. */
3383 unsigned int nvectors = 1;
3384 tree new_vector_type;
3385 tree permutes[2];
3386 if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
3387 &nvectors, &new_vector_type,
3388 permutes))
3389 gcc_unreachable ();
3390
3391 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3392 unsigned int partial_nelts = nelts / nvectors;
3393 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3394
3395 tree_vector_builder partial_elts;
3396 auto_vec<tree, 32> pieces (nvectors * 2);
3397 pieces.quick_grow (nvectors * 2);
3398 for (unsigned int i = 0; i < nvectors; ++i)
3399 {
3400 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3401 ELTS' has mode IM. */
3402 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3403 for (unsigned int j = 0; j < partial_nelts; ++j)
3404 partial_elts.quick_push (elts[i * partial_nelts + j]);
3405 tree t = gimple_build_vector (seq, &partial_elts);
3406 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3407 TREE_TYPE (new_vector_type), t);
3408
3409 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3410 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3411 }
3412
3413 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3414 correct byte contents.
3415
3416 We need to repeat the following operation log2(nvectors) times:
3417
3418 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3419 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3420
3421 However, if each input repeats every N elements and the VF is
3422 a multiple of N * 2, the HI result is the same as the LO. */
3423 unsigned int in_start = 0;
3424 unsigned int out_start = nvectors;
3425 unsigned int hi_start = nvectors / 2;
3426 /* A bound on the number of outputs needed to produce NRESULTS results
3427 in the final iteration. */
3428 unsigned int noutputs_bound = nvectors * nresults;
3429 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3430 {
3431 noutputs_bound /= 2;
3432 unsigned int limit = MIN (noutputs_bound, nvectors);
3433 for (unsigned int i = 0; i < limit; ++i)
3434 {
3435 if ((i & 1) != 0
3436 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3437 2 * in_repeat))
3438 {
3439 pieces[out_start + i] = pieces[out_start + i - 1];
3440 continue;
3441 }
3442
3443 tree output = make_ssa_name (new_vector_type);
3444 tree input1 = pieces[in_start + (i / 2)];
3445 tree input2 = pieces[in_start + (i / 2) + hi_start];
3446 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3447 input1, input2,
3448 permutes[i & 1]);
3449 gimple_seq_add_stmt (seq, stmt);
3450 pieces[out_start + i] = output;
3451 }
3452 std::swap (in_start, out_start);
3453 }
3454
3455 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3456 results.reserve (nresults);
3457 for (unsigned int i = 0; i < nresults; ++i)
3458 if (i < nvectors)
3459 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3460 pieces[in_start + i]));
3461 else
3462 results.quick_push (results[i - nvectors]);
3463 }
3464
3465
3466 /* For constant and loop invariant defs of SLP_NODE this function returns
3467 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3468 OP_NODE determines the node for the operand containing the scalar
3469 operands. */
3470
3471 static void
3472 vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
3473 vec<tree> *vec_oprnds)
3474 {
3475 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3476 vec_info *vinfo = stmt_vinfo->vinfo;
3477 unsigned HOST_WIDE_INT nunits;
3478 tree vec_cst;
3479 unsigned j, number_of_places_left_in_vector;
3480 tree vector_type;
3481 tree vop;
3482 int group_size = op_node->ops.length ();
3483 unsigned int vec_num, i;
3484 unsigned number_of_copies = 1;
3485 bool constant_p;
3486 tree neutral_op = NULL;
3487 gimple_seq ctor_seq = NULL;
3488 auto_vec<tree, 16> permute_results;
3489
3490 /* ??? SLP analysis should compute the vector type for the
3491 constant / invariant and store it in the SLP node. */
3492 tree op = op_node->ops[0];
3493 /* Check if vector type is a boolean vector. */
3494 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3495 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3496 && vect_mask_constant_operand_p (stmt_vinfo))
3497 vector_type = truth_type_for (stmt_vectype);
3498 else
3499 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
3500
3501 /* ??? For lane-reducing ops we should also have the required number
3502 of vector stmts initialized rather than second-guessing here. */
3503 unsigned int number_of_vectors;
3504 if (is_gimple_assign (stmt_vinfo->stmt)
3505 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3506 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3507 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3508 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3509 else
3510 number_of_vectors
3511 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3512 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3513 vector_type);
3514 vec_oprnds->create (number_of_vectors);
3515 auto_vec<tree> voprnds (number_of_vectors);
3516
3517 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3518 created vectors. It is greater than 1 if unrolling is performed.
3519
3520 For example, we have two scalar operands, s1 and s2 (e.g., group of
3521 strided accesses of size two), while NUNITS is four (i.e., four scalars
3522 of this type can be packed in a vector). The output vector will contain
3523 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3524 will be 2).
3525
3526 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3527 containing the operands.
3528
3529 For example, NUNITS is four as before, and the group size is 8
3530 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3531 {s5, s6, s7, s8}. */
3532
3533 /* When using duplicate_and_interleave, we just need one element for
3534 each scalar statement. */
3535 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3536 nunits = group_size;
3537
3538 number_of_copies = nunits * number_of_vectors / group_size;
3539
3540 number_of_places_left_in_vector = nunits;
3541 constant_p = true;
3542 tree_vector_builder elts (vector_type, nunits, 1);
3543 elts.quick_grow (nunits);
3544 bool place_after_defs = false;
3545 for (j = 0; j < number_of_copies; j++)
3546 {
3547 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
3548 {
3549 /* Create 'vect_ = {op0,op1,...,opn}'. */
3550 number_of_places_left_in_vector--;
3551 tree orig_op = op;
3552 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3553 {
3554 if (CONSTANT_CLASS_P (op))
3555 {
3556 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3557 {
3558 /* Can't use VIEW_CONVERT_EXPR for booleans because
3559 of possibly different sizes of scalar value and
3560 vector element. */
3561 if (integer_zerop (op))
3562 op = build_int_cst (TREE_TYPE (vector_type), 0);
3563 else if (integer_onep (op))
3564 op = build_all_ones_cst (TREE_TYPE (vector_type));
3565 else
3566 gcc_unreachable ();
3567 }
3568 else
3569 op = fold_unary (VIEW_CONVERT_EXPR,
3570 TREE_TYPE (vector_type), op);
3571 gcc_assert (op && CONSTANT_CLASS_P (op));
3572 }
3573 else
3574 {
3575 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3576 gimple *init_stmt;
3577 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3578 {
3579 tree true_val
3580 = build_all_ones_cst (TREE_TYPE (vector_type));
3581 tree false_val
3582 = build_zero_cst (TREE_TYPE (vector_type));
3583 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3584 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3585 op, true_val,
3586 false_val);
3587 }
3588 else
3589 {
3590 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3591 op);
3592 init_stmt
3593 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3594 op);
3595 }
3596 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3597 op = new_temp;
3598 }
3599 }
3600 elts[number_of_places_left_in_vector] = op;
3601 if (!CONSTANT_CLASS_P (op))
3602 constant_p = false;
3603 if (TREE_CODE (orig_op) == SSA_NAME
3604 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3605 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3606 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3607 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3608 place_after_defs = true;
3609
3610 if (number_of_places_left_in_vector == 0)
3611 {
3612 if (constant_p
3613 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3614 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3615 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3616 else
3617 {
3618 if (permute_results.is_empty ())
3619 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3620 elts, number_of_vectors,
3621 permute_results);
3622 vec_cst = permute_results[number_of_vectors - j - 1];
3623 }
3624 tree init;
3625 gimple_stmt_iterator gsi;
3626 if (place_after_defs)
3627 {
3628 stmt_vec_info last_stmt_info
3629 = vect_find_last_scalar_stmt_in_slp (slp_node);
3630 gsi = gsi_for_stmt (last_stmt_info->stmt);
3631 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3632 &gsi);
3633 }
3634 else
3635 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3636 NULL);
3637 if (ctor_seq != NULL)
3638 {
3639 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3640 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3641 ctor_seq = NULL;
3642 }
3643 voprnds.quick_push (init);
3644 place_after_defs = false;
3645 number_of_places_left_in_vector = nunits;
3646 constant_p = true;
3647 elts.new_vector (vector_type, nunits, 1);
3648 elts.quick_grow (nunits);
3649 }
3650 }
3651 }
3652
3653 /* Since the vectors are created in the reverse order, we should invert
3654 them. */
3655 vec_num = voprnds.length ();
3656 for (j = vec_num; j != 0; j--)
3657 {
3658 vop = voprnds[j - 1];
3659 vec_oprnds->quick_push (vop);
3660 }
3661
3662 /* In case that VF is greater than the unrolling factor needed for the SLP
3663 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3664 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3665 to replicate the vectors. */
3666 while (number_of_vectors > vec_oprnds->length ())
3667 {
3668 tree neutral_vec = NULL;
3669
3670 if (neutral_op)
3671 {
3672 if (!neutral_vec)
3673 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3674
3675 vec_oprnds->quick_push (neutral_vec);
3676 }
3677 else
3678 {
3679 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3680 vec_oprnds->quick_push (vop);
3681 }
3682 }
3683 }
3684
3685
3686 /* Get vectorized definitions from SLP_NODE that contains corresponding
3687 vectorized def-stmts. */
3688
3689 static void
3690 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3691 {
3692 stmt_vec_info vec_def_stmt_info;
3693 unsigned int i;
3694
3695 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3696
3697 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3698 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
3699 }
3700
3701
3702 /* Get N vectorized definitions for SLP_NODE.
3703 If the scalar definitions are loop invariants or constants, collect them and
3704 call vect_get_constant_vectors() to create vector stmts.
3705 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3706 must be stored in the corresponding child of SLP_NODE, and we call
3707 vect_get_slp_vect_defs () to retrieve them. */
3708
3709 void
3710 vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
3711 {
3712 if (n == -1U)
3713 n = SLP_TREE_CHILDREN (slp_node).length ();
3714
3715 for (unsigned i = 0; i < n; ++i)
3716 {
3717 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
3718
3719 vec<tree> vec_defs = vNULL;
3720
3721 /* For each operand we check if it has vectorized definitions in a child
3722 node or we need to create them (for invariants and constants). */
3723 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3724 {
3725 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3726 vect_get_slp_vect_defs (child, &vec_defs);
3727 }
3728 else
3729 vect_get_constant_vectors (child, slp_node, &vec_defs);
3730
3731 vec_oprnds->quick_push (vec_defs);
3732 }
3733 }
3734
3735 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3736 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3737 permute statements for the SLP node NODE of the SLP instance
3738 SLP_NODE_INSTANCE. */
3739
3740 bool
3741 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3742 gimple_stmt_iterator *gsi, poly_uint64 vf,
3743 slp_instance slp_node_instance, bool analyze_only,
3744 unsigned *n_perms)
3745 {
3746 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3747 vec_info *vinfo = stmt_info->vinfo;
3748 int vec_index = 0;
3749 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3750 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3751 unsigned int mask_element;
3752 machine_mode mode;
3753
3754 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3755 return false;
3756
3757 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3758
3759 mode = TYPE_MODE (vectype);
3760 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3761
3762 /* Initialize the vect stmts of NODE to properly insert the generated
3763 stmts later. */
3764 if (! analyze_only)
3765 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3766 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3767 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3768
3769 /* Generate permutation masks for every NODE. Number of masks for each NODE
3770 is equal to GROUP_SIZE.
3771 E.g., we have a group of three nodes with three loads from the same
3772 location in each node, and the vector size is 4. I.e., we have a
3773 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3774 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3775 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3776 ...
3777
3778 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3779 The last mask is illegal since we assume two operands for permute
3780 operation, and the mask element values can't be outside that range.
3781 Hence, the last mask must be converted into {2,5,5,5}.
3782 For the first two permutations we need the first and the second input
3783 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3784 we need the second and the third vectors: {b1,c1,a2,b2} and
3785 {c2,a3,b3,c3}. */
3786
3787 int vect_stmts_counter = 0;
3788 unsigned int index = 0;
3789 int first_vec_index = -1;
3790 int second_vec_index = -1;
3791 bool noop_p = true;
3792 *n_perms = 0;
3793
3794 vec_perm_builder mask;
3795 unsigned int nelts_to_build;
3796 unsigned int nvectors_per_build;
3797 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3798 && multiple_p (nunits, group_size));
3799 if (repeating_p)
3800 {
3801 /* A single vector contains a whole number of copies of the node, so:
3802 (a) all permutes can use the same mask; and
3803 (b) the permutes only need a single vector input. */
3804 mask.new_vector (nunits, group_size, 3);
3805 nelts_to_build = mask.encoded_nelts ();
3806 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3807 }
3808 else
3809 {
3810 /* We need to construct a separate mask for each vector statement. */
3811 unsigned HOST_WIDE_INT const_nunits, const_vf;
3812 if (!nunits.is_constant (&const_nunits)
3813 || !vf.is_constant (&const_vf))
3814 return false;
3815 mask.new_vector (const_nunits, const_nunits, 1);
3816 nelts_to_build = const_vf * group_size;
3817 nvectors_per_build = 1;
3818 }
3819
3820 unsigned int count = mask.encoded_nelts ();
3821 mask.quick_grow (count);
3822 vec_perm_indices indices;
3823
3824 for (unsigned int j = 0; j < nelts_to_build; j++)
3825 {
3826 unsigned int iter_num = j / group_size;
3827 unsigned int stmt_num = j % group_size;
3828 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3829 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3830 if (repeating_p)
3831 {
3832 first_vec_index = 0;
3833 mask_element = i;
3834 }
3835 else
3836 {
3837 /* Enforced before the loop when !repeating_p. */
3838 unsigned int const_nunits = nunits.to_constant ();
3839 vec_index = i / const_nunits;
3840 mask_element = i % const_nunits;
3841 if (vec_index == first_vec_index
3842 || first_vec_index == -1)
3843 {
3844 first_vec_index = vec_index;
3845 }
3846 else if (vec_index == second_vec_index
3847 || second_vec_index == -1)
3848 {
3849 second_vec_index = vec_index;
3850 mask_element += const_nunits;
3851 }
3852 else
3853 {
3854 if (dump_enabled_p ())
3855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3856 "permutation requires at "
3857 "least three vectors %G",
3858 stmt_info->stmt);
3859 gcc_assert (analyze_only);
3860 return false;
3861 }
3862
3863 gcc_assert (mask_element < 2 * const_nunits);
3864 }
3865
3866 if (mask_element != index)
3867 noop_p = false;
3868 mask[index++] = mask_element;
3869
3870 if (index == count && !noop_p)
3871 {
3872 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3873 if (!can_vec_perm_const_p (mode, indices))
3874 {
3875 if (dump_enabled_p ())
3876 {
3877 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3878 vect_location,
3879 "unsupported vect permute { ");
3880 for (i = 0; i < count; ++i)
3881 {
3882 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3883 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3884 }
3885 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3886 }
3887 gcc_assert (analyze_only);
3888 return false;
3889 }
3890
3891 ++*n_perms;
3892 }
3893
3894 if (index == count)
3895 {
3896 if (!analyze_only)
3897 {
3898 tree mask_vec = NULL_TREE;
3899
3900 if (! noop_p)
3901 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3902
3903 if (second_vec_index == -1)
3904 second_vec_index = first_vec_index;
3905
3906 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3907 {
3908 /* Generate the permute statement if necessary. */
3909 tree first_vec = dr_chain[first_vec_index + ri];
3910 tree second_vec = dr_chain[second_vec_index + ri];
3911 stmt_vec_info perm_stmt_info;
3912 if (! noop_p)
3913 {
3914 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3915 tree perm_dest
3916 = vect_create_destination_var (gimple_assign_lhs (stmt),
3917 vectype);
3918 perm_dest = make_ssa_name (perm_dest);
3919 gassign *perm_stmt
3920 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3921 first_vec, second_vec,
3922 mask_vec);
3923 perm_stmt_info
3924 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3925 gsi);
3926 }
3927 else
3928 /* If mask was NULL_TREE generate the requested
3929 identity transform. */
3930 perm_stmt_info = vinfo->lookup_def (first_vec);
3931
3932 /* Store the vector statement in NODE. */
3933 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3934 = perm_stmt_info;
3935 }
3936 }
3937
3938 index = 0;
3939 first_vec_index = -1;
3940 second_vec_index = -1;
3941 noop_p = true;
3942 }
3943 }
3944
3945 return true;
3946 }
3947
3948 /* Vectorize SLP instance tree in postorder. */
3949
3950 static void
3951 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3952 scalar_stmts_to_slp_tree_map_t *bst_map)
3953 {
3954 gimple_stmt_iterator si;
3955 stmt_vec_info stmt_info;
3956 unsigned int group_size;
3957 tree vectype;
3958 int i, j;
3959 slp_tree child;
3960
3961 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3962 return;
3963
3964 /* See if we have already vectorized the node in the graph of the
3965 SLP instance. */
3966 if (SLP_TREE_VEC_STMTS (node).exists ())
3967 return;
3968
3969 /* See if we have already vectorized the same set of stmts and reuse their
3970 vectorized stmts across instances. */
3971 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3972 {
3973 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3974 return;
3975 }
3976
3977 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3978 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3979 vect_schedule_slp_instance (child, instance, bst_map);
3980
3981 /* Push SLP node def-type to stmts. */
3982 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3983 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3984 {
3985 stmt_vec_info child_stmt_info;
3986 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3987 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3988 }
3989
3990 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3991
3992 /* VECTYPE is the type of the destination. */
3993 vectype = STMT_VINFO_VECTYPE (stmt_info);
3994 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3995 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3996
3997 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3998 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3999
4000 if (dump_enabled_p ())
4001 dump_printf_loc (MSG_NOTE, vect_location,
4002 "------>vectorizing SLP node starting from: %G",
4003 stmt_info->stmt);
4004
4005 /* Vectorized stmts go before the last scalar stmt which is where
4006 all uses are ready. */
4007 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4008 si = gsi_for_stmt (last_stmt_info->stmt);
4009
4010 /* Handle two-operation SLP nodes by vectorizing the group with
4011 both operations and then performing a merge. */
4012 if (SLP_TREE_TWO_OPERATORS (node))
4013 {
4014 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4015 enum tree_code code0 = gimple_assign_rhs_code (stmt);
4016 enum tree_code ocode = ERROR_MARK;
4017 stmt_vec_info ostmt_info;
4018 vec_perm_builder mask (group_size, group_size, 1);
4019 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4020 {
4021 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4022 if (gimple_assign_rhs_code (ostmt) != code0)
4023 {
4024 mask.quick_push (1);
4025 ocode = gimple_assign_rhs_code (ostmt);
4026 }
4027 else
4028 mask.quick_push (0);
4029 }
4030 if (ocode != ERROR_MARK)
4031 {
4032 vec<stmt_vec_info> v0;
4033 vec<stmt_vec_info> v1;
4034 unsigned j;
4035 tree tmask = NULL_TREE;
4036 vect_transform_stmt (stmt_info, &si, node, instance);
4037 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4038 SLP_TREE_VEC_STMTS (node).truncate (0);
4039 gimple_assign_set_rhs_code (stmt, ocode);
4040 vect_transform_stmt (stmt_info, &si, node, instance);
4041 gimple_assign_set_rhs_code (stmt, code0);
4042 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4043 SLP_TREE_VEC_STMTS (node).truncate (0);
4044 tree meltype = build_nonstandard_integer_type
4045 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4046 tree mvectype = get_same_sized_vectype (meltype, vectype);
4047 unsigned k = 0, l;
4048 for (j = 0; j < v0.length (); ++j)
4049 {
4050 /* Enforced by vect_build_slp_tree, which rejects variable-length
4051 vectors for SLP_TREE_TWO_OPERATORS. */
4052 unsigned int const_nunits = nunits.to_constant ();
4053 tree_vector_builder melts (mvectype, const_nunits, 1);
4054 for (l = 0; l < const_nunits; ++l)
4055 {
4056 if (k >= group_size)
4057 k = 0;
4058 tree t = build_int_cst (meltype,
4059 mask[k++] * const_nunits + l);
4060 melts.quick_push (t);
4061 }
4062 tmask = melts.build ();
4063
4064 /* ??? Not all targets support a VEC_PERM_EXPR with a
4065 constant mask that would translate to a vec_merge RTX
4066 (with their vec_perm_const_ok). We can either not
4067 vectorize in that case or let veclower do its job.
4068 Unfortunately that isn't too great and at least for
4069 plus/minus we'd eventually like to match targets
4070 vector addsub instructions. */
4071 gimple *vstmt;
4072 vstmt = gimple_build_assign (make_ssa_name (vectype),
4073 VEC_PERM_EXPR,
4074 gimple_assign_lhs (v0[j]->stmt),
4075 gimple_assign_lhs (v1[j]->stmt),
4076 tmask);
4077 SLP_TREE_VEC_STMTS (node).quick_push
4078 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
4079 }
4080 v0.release ();
4081 v1.release ();
4082 return;
4083 }
4084 }
4085 vect_transform_stmt (stmt_info, &si, node, instance);
4086
4087 /* Restore stmt def-types. */
4088 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4089 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4090 {
4091 stmt_vec_info child_stmt_info;
4092 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4093 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4094 }
4095 }
4096
4097 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4098 For loop vectorization this is done in vectorizable_call, but for SLP
4099 it needs to be deferred until end of vect_schedule_slp, because multiple
4100 SLP instances may refer to the same scalar stmt. */
4101
4102 static void
4103 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4104 {
4105 gimple *new_stmt;
4106 gimple_stmt_iterator gsi;
4107 int i;
4108 slp_tree child;
4109 tree lhs;
4110 stmt_vec_info stmt_info;
4111
4112 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4113 return;
4114
4115 if (visited.add (node))
4116 return;
4117
4118 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4119 vect_remove_slp_scalar_calls (child, visited);
4120
4121 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4122 {
4123 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4124 if (!stmt || gimple_bb (stmt) == NULL)
4125 continue;
4126 if (is_pattern_stmt_p (stmt_info)
4127 || !PURE_SLP_STMT (stmt_info))
4128 continue;
4129 lhs = gimple_call_lhs (stmt);
4130 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4131 gsi = gsi_for_stmt (stmt);
4132 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4133 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4134 }
4135 }
4136
4137 static void
4138 vect_remove_slp_scalar_calls (slp_tree node)
4139 {
4140 hash_set<slp_tree> visited;
4141 vect_remove_slp_scalar_calls (node, visited);
4142 }
4143
4144 /* Vectorize the instance root. */
4145
4146 void
4147 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4148 {
4149 gassign *rstmt = NULL;
4150
4151 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4152 {
4153 stmt_vec_info child_stmt_info;
4154 int j;
4155
4156 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4157 {
4158 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4159 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4160 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4161 break;
4162 }
4163 }
4164 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4165 {
4166 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4167 stmt_vec_info child_stmt_info;
4168 int j;
4169 vec<constructor_elt, va_gc> *v;
4170 vec_alloc (v, nelts);
4171
4172 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4173 {
4174 CONSTRUCTOR_APPEND_ELT (v,
4175 NULL_TREE,
4176 gimple_get_lhs (child_stmt_info->stmt));
4177 }
4178 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4179 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4180 tree r_constructor = build_constructor (rtype, v);
4181 rstmt = gimple_build_assign (lhs, r_constructor);
4182 }
4183
4184 gcc_assert (rstmt);
4185
4186 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4187 gsi_replace (&rgsi, rstmt, true);
4188 }
4189
4190 /* Generate vector code for all SLP instances in the loop/basic block. */
4191
4192 void
4193 vect_schedule_slp (vec_info *vinfo)
4194 {
4195 vec<slp_instance> slp_instances;
4196 slp_instance instance;
4197 unsigned int i;
4198
4199 scalar_stmts_to_slp_tree_map_t *bst_map
4200 = new scalar_stmts_to_slp_tree_map_t ();
4201 slp_instances = vinfo->slp_instances;
4202 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4203 {
4204 slp_tree node = SLP_INSTANCE_TREE (instance);
4205 /* Schedule the tree of INSTANCE. */
4206 vect_schedule_slp_instance (node, instance, bst_map);
4207
4208 if (SLP_INSTANCE_ROOT_STMT (instance))
4209 vectorize_slp_instance_root_stmt (node, instance);
4210
4211 if (dump_enabled_p ())
4212 dump_printf_loc (MSG_NOTE, vect_location,
4213 "vectorizing stmts using SLP.\n");
4214 }
4215 delete bst_map;
4216
4217 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4218 {
4219 slp_tree root = SLP_INSTANCE_TREE (instance);
4220 stmt_vec_info store_info;
4221 unsigned int j;
4222
4223 /* Remove scalar call stmts. Do not do this for basic-block
4224 vectorization as not all uses may be vectorized.
4225 ??? Why should this be necessary? DCE should be able to
4226 remove the stmts itself.
4227 ??? For BB vectorization we can as well remove scalar
4228 stmts starting from the SLP tree root if they have no
4229 uses. */
4230 if (is_a <loop_vec_info> (vinfo))
4231 vect_remove_slp_scalar_calls (root);
4232
4233 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4234 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4235 {
4236 if (!STMT_VINFO_DATA_REF (store_info))
4237 break;
4238
4239 if (SLP_INSTANCE_ROOT_STMT (instance))
4240 continue;
4241
4242 store_info = vect_orig_stmt (store_info);
4243 /* Free the attached stmt_vec_info and remove the stmt. */
4244 vinfo->remove_stmt (store_info);
4245 }
4246 }
4247 }