]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-slp.c
arm: Factorize several occurrences of the same code into reg_needs_saving_p
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
42 #include "dbgcnt.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47
48
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
52
53 static void
54 vect_free_slp_tree (slp_tree node, bool final_p)
55 {
56 int i;
57 slp_tree child;
58
59 if (--node->refcnt != 0)
60 return;
61
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
63 vect_free_slp_tree (child, final_p);
64
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
69 if (!final_p)
70 {
71 stmt_vec_info stmt_info;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
73 {
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
76 }
77 }
78
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
81 SLP_TREE_SCALAR_OPS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
83 SLP_TREE_LOAD_PERMUTATION (node).release ();
84
85 free (node);
86 }
87
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
91
92 void
93 vect_free_slp_instance (slp_instance instance, bool final_p)
94 {
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96 SLP_INSTANCE_LOADS (instance).release ();
97 free (instance);
98 }
99
100
101 /* Create an SLP node for SCALAR_STMTS. */
102
103 static slp_tree
104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
105 {
106 slp_tree node;
107 stmt_vec_info stmt_info = scalar_stmts[0];
108 unsigned int nops;
109
110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
118 else if (is_a <gphi *> (stmt_info->stmt))
119 nops = 0;
120 else
121 return NULL;
122
123 node = XNEW (struct _slp_tree);
124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125 SLP_TREE_SCALAR_OPS (node) = vNULL;
126 SLP_TREE_VEC_STMTS (node).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
128 SLP_TREE_CHILDREN (node).create (nops);
129 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
130 SLP_TREE_TWO_OPERATORS (node) = false;
131 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
132 node->refcnt = 1;
133 node->max_nunits = 1;
134
135 unsigned i;
136 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
138
139 return node;
140 }
141
142 /* Create an SLP node for OPS. */
143
144 static slp_tree
145 vect_create_new_slp_node (vec<tree> ops)
146 {
147 slp_tree node;
148
149 node = XNEW (struct _slp_tree);
150 SLP_TREE_SCALAR_STMTS (node) = vNULL;
151 SLP_TREE_SCALAR_OPS (node) = ops;
152 SLP_TREE_VEC_STMTS (node).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154 SLP_TREE_CHILDREN (node) = vNULL;
155 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156 SLP_TREE_TWO_OPERATORS (node) = false;
157 SLP_TREE_DEF_TYPE (node) = vect_external_def;
158 node->refcnt = 1;
159 node->max_nunits = 1;
160
161 return node;
162 }
163
164
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
167 node. */
168 typedef struct _slp_oprnd_info
169 {
170 /* Def-stmts for the operands. */
171 vec<stmt_vec_info> def_stmts;
172 /* Operands. */
173 vec<tree> ops;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
176 stmt. */
177 tree first_op_type;
178 enum vect_def_type first_dt;
179 bool any_pattern;
180 } *slp_oprnd_info;
181
182
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184 operand. */
185 static vec<slp_oprnd_info>
186 vect_create_oprnd_info (int nops, int group_size)
187 {
188 int i;
189 slp_oprnd_info oprnd_info;
190 vec<slp_oprnd_info> oprnds_info;
191
192 oprnds_info.create (nops);
193 for (i = 0; i < nops; i++)
194 {
195 oprnd_info = XNEW (struct _slp_oprnd_info);
196 oprnd_info->def_stmts.create (group_size);
197 oprnd_info->ops.create (group_size);
198 oprnd_info->first_dt = vect_uninitialized_def;
199 oprnd_info->first_op_type = NULL_TREE;
200 oprnd_info->any_pattern = false;
201 oprnds_info.quick_push (oprnd_info);
202 }
203
204 return oprnds_info;
205 }
206
207
208 /* Free operands info. */
209
210 static void
211 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
212 {
213 int i;
214 slp_oprnd_info oprnd_info;
215
216 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
217 {
218 oprnd_info->def_stmts.release ();
219 oprnd_info->ops.release ();
220 XDELETE (oprnd_info);
221 }
222
223 oprnds_info.release ();
224 }
225
226
227 /* Return true if STMTS contains a pattern statement. */
228
229 static bool
230 vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
231 {
232 stmt_vec_info stmt_info;
233 unsigned int i;
234 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
235 if (is_pattern_stmt_p (stmt_info))
236 return true;
237 return false;
238 }
239
240 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
241 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
242 of the chain. */
243
244 int
245 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
246 stmt_vec_info first_stmt_info)
247 {
248 stmt_vec_info next_stmt_info = first_stmt_info;
249 int result = 0;
250
251 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
252 return -1;
253
254 do
255 {
256 if (next_stmt_info == stmt_info)
257 return result;
258 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
259 if (next_stmt_info)
260 result += DR_GROUP_GAP (next_stmt_info);
261 }
262 while (next_stmt_info);
263
264 return -1;
265 }
266
267 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
268 using the method implemented by duplicate_and_interleave. Return true
269 if so, returning the number of intermediate vectors in *NVECTORS_OUT
270 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
271 (if nonnull). */
272
273 bool
274 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
275 tree elt_type, unsigned int *nvectors_out,
276 tree *vector_type_out,
277 tree *permutes)
278 {
279 tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
280 if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
281 return false;
282
283 machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
284 poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
285 unsigned int nvectors = 1;
286 for (;;)
287 {
288 scalar_int_mode int_mode;
289 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
290 if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
291 {
292 /* Get the natural vector type for this SLP group size. */
293 tree int_type = build_nonstandard_integer_type
294 (GET_MODE_BITSIZE (int_mode), 1);
295 tree vector_type
296 = get_vectype_for_scalar_type (vinfo, int_type, count);
297 if (vector_type
298 && VECTOR_MODE_P (TYPE_MODE (vector_type))
299 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
300 GET_MODE_SIZE (base_vector_mode)))
301 {
302 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 together into elements of type INT_TYPE and using the result
304 to build NVECTORS vectors. */
305 poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
306 vec_perm_builder sel1 (nelts, 2, 3);
307 vec_perm_builder sel2 (nelts, 2, 3);
308 poly_int64 half_nelts = exact_div (nelts, 2);
309 for (unsigned int i = 0; i < 3; ++i)
310 {
311 sel1.quick_push (i);
312 sel1.quick_push (i + nelts);
313 sel2.quick_push (half_nelts + i);
314 sel2.quick_push (half_nelts + i + nelts);
315 }
316 vec_perm_indices indices1 (sel1, 2, nelts);
317 vec_perm_indices indices2 (sel2, 2, nelts);
318 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
319 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
320 {
321 if (nvectors_out)
322 *nvectors_out = nvectors;
323 if (vector_type_out)
324 *vector_type_out = vector_type;
325 if (permutes)
326 {
327 permutes[0] = vect_gen_perm_mask_checked (vector_type,
328 indices1);
329 permutes[1] = vect_gen_perm_mask_checked (vector_type,
330 indices2);
331 }
332 return true;
333 }
334 }
335 }
336 if (!multiple_p (elt_bytes, 2, &elt_bytes))
337 return false;
338 nvectors *= 2;
339 }
340 }
341
342 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343 they are of a valid type and that they match the defs of the first stmt of
344 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
345 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
346 indicates swap is required for cond_expr stmts. Specifically, *SWAP
347 is 1 if STMT is cond and operands of comparison need to be swapped;
348 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349 If there is any operand swap in this function, *SWAP is set to non-zero
350 value.
351 If there was a fatal error return -1; if the error could be corrected by
352 swapping operands of father node of this one, return 1; if everything is
353 ok return 0. */
354 static int
355 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
356 vec<stmt_vec_info> stmts, unsigned stmt_num,
357 vec<slp_oprnd_info> *oprnds_info)
358 {
359 stmt_vec_info stmt_info = stmts[stmt_num];
360 tree oprnd;
361 unsigned int i, number_of_oprnds;
362 enum vect_def_type dt = vect_uninitialized_def;
363 slp_oprnd_info oprnd_info;
364 int first_op_idx = 1;
365 unsigned int commutative_op = -1U;
366 bool first_op_cond = false;
367 bool first = stmt_num == 0;
368
369 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
370 {
371 number_of_oprnds = gimple_call_num_args (stmt);
372 first_op_idx = 3;
373 if (gimple_call_internal_p (stmt))
374 {
375 internal_fn ifn = gimple_call_internal_fn (stmt);
376 commutative_op = first_commutative_argument (ifn);
377
378 /* Masked load, only look at mask. */
379 if (ifn == IFN_MASK_LOAD)
380 {
381 number_of_oprnds = 1;
382 /* Mask operand index. */
383 first_op_idx = 5;
384 }
385 }
386 }
387 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
388 {
389 enum tree_code code = gimple_assign_rhs_code (stmt);
390 number_of_oprnds = gimple_num_ops (stmt) - 1;
391 /* Swap can only be done for cond_expr if asked to, otherwise we
392 could result in different comparison code to the first stmt. */
393 if (code == COND_EXPR
394 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
395 {
396 first_op_cond = true;
397 number_of_oprnds++;
398 }
399 else
400 commutative_op = commutative_tree_code (code) ? 0U : -1U;
401 }
402 else
403 return -1;
404
405 bool swapped = (*swap != 0);
406 gcc_assert (!swapped || first_op_cond);
407 for (i = 0; i < number_of_oprnds; i++)
408 {
409 again:
410 if (first_op_cond)
411 {
412 /* Map indicating how operands of cond_expr should be swapped. */
413 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 int *map = maps[*swap];
415
416 if (i < 2)
417 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
418 first_op_idx), map[i]);
419 else
420 oprnd = gimple_op (stmt_info->stmt, map[i]);
421 }
422 else
423 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
424 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
425 oprnd = TREE_OPERAND (oprnd, 0);
426
427 oprnd_info = (*oprnds_info)[i];
428
429 stmt_vec_info def_stmt_info;
430 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
431 {
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434 "Build SLP failed: can't analyze def for %T\n",
435 oprnd);
436
437 return -1;
438 }
439
440 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
441 oprnd_info->any_pattern = true;
442
443 if (first)
444 {
445 /* For the swapping logic below force vect_reduction_def
446 for the reduction op in a SLP reduction group. */
447 if (!STMT_VINFO_DATA_REF (stmt_info)
448 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
449 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
450 && def_stmt_info)
451 dt = vect_reduction_def;
452 oprnd_info->first_dt = dt;
453 oprnd_info->first_op_type = TREE_TYPE (oprnd);
454 }
455 else
456 {
457 /* Not first stmt of the group, check that the def-stmt/s match
458 the def-stmt/s of the first stmt. Allow different definition
459 types for reduction chains: the first stmt must be a
460 vect_reduction_def (a phi node), and the rest
461 end in the reduction chain. */
462 tree type = TREE_TYPE (oprnd);
463 if ((oprnd_info->first_dt != dt
464 && !(oprnd_info->first_dt == vect_reduction_def
465 && !STMT_VINFO_DATA_REF (stmt_info)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
467 && def_stmt_info
468 && !STMT_VINFO_DATA_REF (def_stmt_info)
469 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
470 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
471 && !((oprnd_info->first_dt == vect_external_def
472 || oprnd_info->first_dt == vect_constant_def)
473 && (dt == vect_external_def
474 || dt == vect_constant_def)))
475 || !types_compatible_p (oprnd_info->first_op_type, type)
476 || (!STMT_VINFO_DATA_REF (stmt_info)
477 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
478 && ((!def_stmt_info
479 || STMT_VINFO_DATA_REF (def_stmt_info)
480 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
481 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
482 != (oprnd_info->first_dt != vect_reduction_def))))
483 {
484 /* Try swapping operands if we got a mismatch. */
485 if (i == commutative_op && !swapped)
486 {
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "trying swapped operands\n");
490 swapped = true;
491 goto again;
492 }
493
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 "Build SLP failed: different types\n");
497
498 return 1;
499 }
500 if ((dt == vect_constant_def
501 || dt == vect_external_def)
502 && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
503 && (TREE_CODE (type) == BOOLEAN_TYPE
504 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
505 type)))
506 {
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "Build SLP failed: invalid type of def "
510 "for variable-length SLP %T\n", oprnd);
511 return -1;
512 }
513 }
514
515 /* Check the types of the definitions. */
516 switch (dt)
517 {
518 case vect_external_def:
519 /* Make sure to demote the overall operand to external. */
520 oprnd_info->first_dt = vect_external_def;
521 /* Fallthru. */
522 case vect_constant_def:
523 oprnd_info->def_stmts.quick_push (NULL);
524 oprnd_info->ops.quick_push (oprnd);
525 break;
526
527 case vect_internal_def:
528 case vect_reduction_def:
529 if (oprnd_info->first_dt == vect_reduction_def
530 && !STMT_VINFO_DATA_REF (stmt_info)
531 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
532 && !STMT_VINFO_DATA_REF (def_stmt_info)
533 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
534 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
535 {
536 /* For a SLP reduction chain we want to duplicate the
537 reduction to each of the chain members. That gets
538 us a sane SLP graph (still the stmts are not 100%
539 correct wrt the initial values). */
540 gcc_assert (!first);
541 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
542 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
543 break;
544 }
545 /* Fallthru. */
546 case vect_induction_def:
547 oprnd_info->def_stmts.quick_push (def_stmt_info);
548 oprnd_info->ops.quick_push (oprnd);
549 break;
550
551 default:
552 /* FORNOW: Not supported. */
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
555 "Build SLP failed: illegal type of def %T\n",
556 oprnd);
557
558 return -1;
559 }
560 }
561
562 /* Swap operands. */
563 if (swapped)
564 {
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "swapped operands to match def types in %G",
568 stmt_info->stmt);
569 }
570
571 *swap = swapped;
572 return 0;
573 }
574
575 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
576 Return true if we can, meaning that this choice doesn't conflict with
577 existing SLP nodes that use STMT_INFO. */
578
579 static bool
580 vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
581 {
582 tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
583 if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
584 return true;
585
586 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
587 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
588 {
589 /* We maintain the invariant that if any statement in the group is
590 used, all other members of the group have the same vector type. */
591 stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
592 stmt_vec_info member_info = first_info;
593 for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
594 if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
595 || is_pattern_stmt_p (member_info))
596 break;
597
598 if (!member_info)
599 {
600 for (member_info = first_info; member_info;
601 member_info = DR_GROUP_NEXT_ELEMENT (member_info))
602 STMT_VINFO_VECTYPE (member_info) = vectype;
603 return true;
604 }
605 }
606 else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
607 && !is_pattern_stmt_p (stmt_info))
608 {
609 STMT_VINFO_VECTYPE (stmt_info) = vectype;
610 return true;
611 }
612
613 if (dump_enabled_p ())
614 {
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616 "Build SLP failed: incompatible vector"
617 " types for: %G", stmt_info->stmt);
618 dump_printf_loc (MSG_NOTE, vect_location,
619 " old vector type: %T\n", old_vectype);
620 dump_printf_loc (MSG_NOTE, vect_location,
621 " new vector type: %T\n", vectype);
622 }
623 return false;
624 }
625
626 /* Try to infer and assign a vector type to all the statements in STMTS.
627 Used only for BB vectorization. */
628
629 static bool
630 vect_update_all_shared_vectypes (vec_info *vinfo, vec<stmt_vec_info> stmts)
631 {
632 tree vectype, nunits_vectype;
633 if (!vect_get_vector_types_for_stmt (vinfo, stmts[0], &vectype,
634 &nunits_vectype, stmts.length ()))
635 return false;
636
637 stmt_vec_info stmt_info;
638 unsigned int i;
639 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
640 if (!vect_update_shared_vectype (stmt_info, vectype))
641 return false;
642
643 return true;
644 }
645
646 /* Return true if call statements CALL1 and CALL2 are similar enough
647 to be combined into the same SLP group. */
648
649 static bool
650 compatible_calls_p (gcall *call1, gcall *call2)
651 {
652 unsigned int nargs = gimple_call_num_args (call1);
653 if (nargs != gimple_call_num_args (call2))
654 return false;
655
656 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
657 return false;
658
659 if (gimple_call_internal_p (call1))
660 {
661 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
662 TREE_TYPE (gimple_call_lhs (call2))))
663 return false;
664 for (unsigned int i = 0; i < nargs; ++i)
665 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
666 TREE_TYPE (gimple_call_arg (call2, i))))
667 return false;
668 }
669 else
670 {
671 if (!operand_equal_p (gimple_call_fn (call1),
672 gimple_call_fn (call2), 0))
673 return false;
674
675 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
676 return false;
677 }
678 return true;
679 }
680
681 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
682 caller's attempt to find the vector type in STMT_INFO with the narrowest
683 element type. Return true if VECTYPE is nonnull and if it is valid
684 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
685 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
686 vect_build_slp_tree. */
687
688 static bool
689 vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
690 unsigned int group_size,
691 tree vectype, poly_uint64 *max_nunits)
692 {
693 if (!vectype)
694 {
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
697 "Build SLP failed: unsupported data-type in %G\n",
698 stmt_info->stmt);
699 /* Fatal mismatch. */
700 return false;
701 }
702
703 /* If populating the vector type requires unrolling then fail
704 before adjusting *max_nunits for basic-block vectorization. */
705 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
706 unsigned HOST_WIDE_INT const_nunits;
707 if (is_a <bb_vec_info> (vinfo)
708 && (!nunits.is_constant (&const_nunits)
709 || const_nunits > group_size))
710 {
711 if (dump_enabled_p ())
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "Build SLP failed: unrolling required "
714 "in basic block SLP\n");
715 /* Fatal mismatch. */
716 return false;
717 }
718
719 /* In case of multiple types we need to detect the smallest type. */
720 vect_update_max_nunits (max_nunits, vectype);
721 return true;
722 }
723
724 /* STMTS is a group of GROUP_SIZE SLP statements in which some
725 statements do the same operation as the first statement and in which
726 the others do ALT_STMT_CODE. Return true if we can take one vector
727 of the first operation and one vector of the second and permute them
728 to get the required result. VECTYPE is the type of the vector that
729 would be permuted. */
730
731 static bool
732 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
733 unsigned int group_size, tree vectype,
734 tree_code alt_stmt_code)
735 {
736 unsigned HOST_WIDE_INT count;
737 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
738 return false;
739
740 vec_perm_builder sel (count, count, 1);
741 for (unsigned int i = 0; i < count; ++i)
742 {
743 unsigned int elt = i;
744 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
745 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
746 elt += count;
747 sel.quick_push (elt);
748 }
749 vec_perm_indices indices (sel, 2, count);
750 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
751 }
752
753 /* Verify if the scalar stmts STMTS are isomorphic, require data
754 permutation or are of unsupported types of operation. Return
755 true if they are, otherwise return false and indicate in *MATCHES
756 which stmts are not isomorphic to the first one. If MATCHES[0]
757 is false then this indicates the comparison could not be
758 carried out or the stmts will never be vectorized by SLP.
759
760 Note COND_EXPR is possibly isomorphic to another one after swapping its
761 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
762 the first stmt by swapping the two operands of comparison; set SWAP[i]
763 to 2 if stmt I is isormorphic to the first stmt by inverting the code
764 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
765 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
766
767 static bool
768 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
769 vec<stmt_vec_info> stmts, unsigned int group_size,
770 poly_uint64 *max_nunits, bool *matches,
771 bool *two_operators)
772 {
773 unsigned int i;
774 stmt_vec_info first_stmt_info = stmts[0];
775 enum tree_code first_stmt_code = ERROR_MARK;
776 enum tree_code alt_stmt_code = ERROR_MARK;
777 enum tree_code rhs_code = ERROR_MARK;
778 enum tree_code first_cond_code = ERROR_MARK;
779 tree lhs;
780 bool need_same_oprnds = false;
781 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
782 optab optab;
783 int icode;
784 machine_mode optab_op2_mode;
785 machine_mode vec_mode;
786 stmt_vec_info first_load = NULL, prev_first_load = NULL;
787 bool load_p = false;
788
789 /* For every stmt in NODE find its def stmt/s. */
790 stmt_vec_info stmt_info;
791 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
792 {
793 gimple *stmt = stmt_info->stmt;
794 swap[i] = 0;
795 matches[i] = false;
796
797 if (dump_enabled_p ())
798 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
799
800 /* Fail to vectorize statements marked as unvectorizable. */
801 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
802 {
803 if (dump_enabled_p ())
804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
805 "Build SLP failed: unvectorizable statement %G",
806 stmt);
807 /* Fatal mismatch. */
808 matches[0] = false;
809 return false;
810 }
811
812 lhs = gimple_get_lhs (stmt);
813 if (lhs == NULL_TREE)
814 {
815 if (dump_enabled_p ())
816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817 "Build SLP failed: not GIMPLE_ASSIGN nor "
818 "GIMPLE_CALL %G", stmt);
819 /* Fatal mismatch. */
820 matches[0] = false;
821 return false;
822 }
823
824 tree nunits_vectype;
825 if (!vect_get_vector_types_for_stmt (vinfo, stmt_info, &vectype,
826 &nunits_vectype, group_size)
827 || (nunits_vectype
828 && !vect_record_max_nunits (vinfo, stmt_info, group_size,
829 nunits_vectype, max_nunits)))
830 {
831 /* Fatal mismatch. */
832 matches[0] = false;
833 return false;
834 }
835
836 gcc_assert (vectype);
837
838 if (is_a <bb_vec_info> (vinfo)
839 && !vect_update_shared_vectype (stmt_info, vectype))
840 continue;
841
842 gcall *call_stmt = dyn_cast <gcall *> (stmt);
843 if (call_stmt)
844 {
845 rhs_code = CALL_EXPR;
846
847 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
848 load_p = true;
849 else if ((gimple_call_internal_p (call_stmt)
850 && (!vectorizable_internal_fn_p
851 (gimple_call_internal_fn (call_stmt))))
852 || gimple_call_tail_p (call_stmt)
853 || gimple_call_noreturn_p (call_stmt)
854 || !gimple_call_nothrow_p (call_stmt)
855 || gimple_call_chain (call_stmt))
856 {
857 if (dump_enabled_p ())
858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
859 "Build SLP failed: unsupported call type %G",
860 call_stmt);
861 /* Fatal mismatch. */
862 matches[0] = false;
863 return false;
864 }
865 }
866 else
867 {
868 rhs_code = gimple_assign_rhs_code (stmt);
869 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
870 }
871
872 /* Check the operation. */
873 if (i == 0)
874 {
875 first_stmt_code = rhs_code;
876
877 /* Shift arguments should be equal in all the packed stmts for a
878 vector shift with scalar shift operand. */
879 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
880 || rhs_code == LROTATE_EXPR
881 || rhs_code == RROTATE_EXPR)
882 {
883 vec_mode = TYPE_MODE (vectype);
884
885 /* First see if we have a vector/vector shift. */
886 optab = optab_for_tree_code (rhs_code, vectype,
887 optab_vector);
888
889 if (!optab
890 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
891 {
892 /* No vector/vector shift, try for a vector/scalar shift. */
893 optab = optab_for_tree_code (rhs_code, vectype,
894 optab_scalar);
895
896 if (!optab)
897 {
898 if (dump_enabled_p ())
899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
900 "Build SLP failed: no optab.\n");
901 /* Fatal mismatch. */
902 matches[0] = false;
903 return false;
904 }
905 icode = (int) optab_handler (optab, vec_mode);
906 if (icode == CODE_FOR_nothing)
907 {
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
910 "Build SLP failed: "
911 "op not supported by target.\n");
912 /* Fatal mismatch. */
913 matches[0] = false;
914 return false;
915 }
916 optab_op2_mode = insn_data[icode].operand[2].mode;
917 if (!VECTOR_MODE_P (optab_op2_mode))
918 {
919 need_same_oprnds = true;
920 first_op1 = gimple_assign_rhs2 (stmt);
921 }
922 }
923 }
924 else if (rhs_code == WIDEN_LSHIFT_EXPR)
925 {
926 need_same_oprnds = true;
927 first_op1 = gimple_assign_rhs2 (stmt);
928 }
929 else if (call_stmt
930 && gimple_call_internal_p (call_stmt, IFN_DIV_POW2))
931 {
932 need_same_oprnds = true;
933 first_op1 = gimple_call_arg (call_stmt, 1);
934 }
935 }
936 else
937 {
938 if (first_stmt_code != rhs_code
939 && alt_stmt_code == ERROR_MARK)
940 alt_stmt_code = rhs_code;
941 if (first_stmt_code != rhs_code
942 && (first_stmt_code != IMAGPART_EXPR
943 || rhs_code != REALPART_EXPR)
944 && (first_stmt_code != REALPART_EXPR
945 || rhs_code != IMAGPART_EXPR)
946 /* Handle mismatches in plus/minus by computing both
947 and merging the results. */
948 && !((first_stmt_code == PLUS_EXPR
949 || first_stmt_code == MINUS_EXPR)
950 && (alt_stmt_code == PLUS_EXPR
951 || alt_stmt_code == MINUS_EXPR)
952 && rhs_code == alt_stmt_code)
953 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
954 && (first_stmt_code == ARRAY_REF
955 || first_stmt_code == BIT_FIELD_REF
956 || first_stmt_code == INDIRECT_REF
957 || first_stmt_code == COMPONENT_REF
958 || first_stmt_code == MEM_REF)))
959 {
960 if (dump_enabled_p ())
961 {
962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
963 "Build SLP failed: different operation "
964 "in stmt %G", stmt);
965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
966 "original stmt %G", first_stmt_info->stmt);
967 }
968 /* Mismatch. */
969 continue;
970 }
971
972 if (need_same_oprnds)
973 {
974 tree other_op1 = (call_stmt
975 ? gimple_call_arg (call_stmt, 1)
976 : gimple_assign_rhs2 (stmt));
977 if (!operand_equal_p (first_op1, other_op1, 0))
978 {
979 if (dump_enabled_p ())
980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
981 "Build SLP failed: different shift "
982 "arguments in %G", stmt);
983 /* Mismatch. */
984 continue;
985 }
986 }
987
988 if (!load_p && rhs_code == CALL_EXPR)
989 {
990 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
991 as_a <gcall *> (stmt)))
992 {
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "Build SLP failed: different calls in %G",
996 stmt);
997 /* Mismatch. */
998 continue;
999 }
1000 }
1001 }
1002
1003 /* Grouped store or load. */
1004 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1005 {
1006 if (REFERENCE_CLASS_P (lhs))
1007 {
1008 /* Store. */
1009 ;
1010 }
1011 else
1012 {
1013 /* Load. */
1014 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
1015 if (prev_first_load)
1016 {
1017 /* Check that there are no loads from different interleaving
1018 chains in the same node. */
1019 if (prev_first_load != first_load)
1020 {
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1023 vect_location,
1024 "Build SLP failed: different "
1025 "interleaving chains in one node %G",
1026 stmt);
1027 /* Mismatch. */
1028 continue;
1029 }
1030 }
1031 else
1032 prev_first_load = first_load;
1033 }
1034 } /* Grouped access. */
1035 else
1036 {
1037 if (load_p)
1038 {
1039 /* Not grouped load. */
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1042 "Build SLP failed: not grouped load %G", stmt);
1043
1044 /* FORNOW: Not grouped loads are not supported. */
1045 /* Fatal mismatch. */
1046 matches[0] = false;
1047 return false;
1048 }
1049
1050 /* Not memory operation. */
1051 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
1052 && TREE_CODE_CLASS (rhs_code) != tcc_unary
1053 && TREE_CODE_CLASS (rhs_code) != tcc_expression
1054 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1055 && rhs_code != VIEW_CONVERT_EXPR
1056 && rhs_code != CALL_EXPR)
1057 {
1058 if (dump_enabled_p ())
1059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1060 "Build SLP failed: operation unsupported %G",
1061 stmt);
1062 /* Fatal mismatch. */
1063 matches[0] = false;
1064 return false;
1065 }
1066
1067 if (rhs_code == COND_EXPR)
1068 {
1069 tree cond_expr = gimple_assign_rhs1 (stmt);
1070 enum tree_code cond_code = TREE_CODE (cond_expr);
1071 enum tree_code swap_code = ERROR_MARK;
1072 enum tree_code invert_code = ERROR_MARK;
1073
1074 if (i == 0)
1075 first_cond_code = TREE_CODE (cond_expr);
1076 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1077 {
1078 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1079 swap_code = swap_tree_comparison (cond_code);
1080 invert_code = invert_tree_comparison (cond_code, honor_nans);
1081 }
1082
1083 if (first_cond_code == cond_code)
1084 ;
1085 /* Isomorphic can be achieved by swapping. */
1086 else if (first_cond_code == swap_code)
1087 swap[i] = 1;
1088 /* Isomorphic can be achieved by inverting. */
1089 else if (first_cond_code == invert_code)
1090 swap[i] = 2;
1091 else
1092 {
1093 if (dump_enabled_p ())
1094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1095 "Build SLP failed: different"
1096 " operation %G", stmt);
1097 /* Mismatch. */
1098 continue;
1099 }
1100 }
1101 }
1102
1103 matches[i] = true;
1104 }
1105
1106 for (i = 0; i < group_size; ++i)
1107 if (!matches[i])
1108 return false;
1109
1110 /* If we allowed a two-operation SLP node verify the target can cope
1111 with the permute we are going to use. */
1112 if (alt_stmt_code != ERROR_MARK
1113 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1114 {
1115 if (!vect_two_operations_perm_ok_p (stmts, group_size,
1116 vectype, alt_stmt_code))
1117 {
1118 for (i = 0; i < group_size; ++i)
1119 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1120 {
1121 matches[i] = false;
1122 if (dump_enabled_p ())
1123 {
1124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1125 "Build SLP failed: different operation "
1126 "in stmt %G", stmts[i]->stmt);
1127 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1128 "original stmt %G", first_stmt_info->stmt);
1129 }
1130 }
1131 return false;
1132 }
1133 *two_operators = true;
1134 }
1135
1136 return true;
1137 }
1138
1139 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1140 Note we never remove apart from at destruction time so we do not
1141 need a special value for deleted that differs from empty. */
1142 struct bst_traits
1143 {
1144 typedef vec <stmt_vec_info> value_type;
1145 typedef vec <stmt_vec_info> compare_type;
1146 static inline hashval_t hash (value_type);
1147 static inline bool equal (value_type existing, value_type candidate);
1148 static inline bool is_empty (value_type x) { return !x.exists (); }
1149 static inline bool is_deleted (value_type x) { return !x.exists (); }
1150 static const bool empty_zero_p = true;
1151 static inline void mark_empty (value_type &x) { x.release (); }
1152 static inline void mark_deleted (value_type &x) { x.release (); }
1153 static inline void remove (value_type &x) { x.release (); }
1154 };
1155 inline hashval_t
1156 bst_traits::hash (value_type x)
1157 {
1158 inchash::hash h;
1159 for (unsigned i = 0; i < x.length (); ++i)
1160 h.add_int (gimple_uid (x[i]->stmt));
1161 return h.end ();
1162 }
1163 inline bool
1164 bst_traits::equal (value_type existing, value_type candidate)
1165 {
1166 if (existing.length () != candidate.length ())
1167 return false;
1168 for (unsigned i = 0; i < existing.length (); ++i)
1169 if (existing[i] != candidate[i])
1170 return false;
1171 return true;
1172 }
1173
1174 typedef hash_map <vec <gimple *>, slp_tree,
1175 simple_hashmap_traits <bst_traits, slp_tree> >
1176 scalar_stmts_to_slp_tree_map_t;
1177
1178 static slp_tree
1179 vect_build_slp_tree_2 (vec_info *vinfo,
1180 vec<stmt_vec_info> stmts, unsigned int group_size,
1181 poly_uint64 *max_nunits,
1182 bool *matches, unsigned *npermutes, unsigned *tree_size,
1183 scalar_stmts_to_slp_tree_map_t *bst_map);
1184
1185 static slp_tree
1186 vect_build_slp_tree (vec_info *vinfo,
1187 vec<stmt_vec_info> stmts, unsigned int group_size,
1188 poly_uint64 *max_nunits,
1189 bool *matches, unsigned *npermutes, unsigned *tree_size,
1190 scalar_stmts_to_slp_tree_map_t *bst_map)
1191 {
1192 if (slp_tree *leader = bst_map->get (stmts))
1193 {
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1196 *leader ? "" : "failed ", *leader);
1197 if (*leader)
1198 {
1199 (*leader)->refcnt++;
1200 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1201 }
1202 return *leader;
1203 }
1204 poly_uint64 this_max_nunits = 1;
1205 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
1206 &this_max_nunits,
1207 matches, npermutes, tree_size, bst_map);
1208 if (res)
1209 {
1210 res->max_nunits = this_max_nunits;
1211 vect_update_max_nunits (max_nunits, this_max_nunits);
1212 /* Keep a reference for the bst_map use. */
1213 res->refcnt++;
1214 }
1215 bst_map->put (stmts.copy (), res);
1216 return res;
1217 }
1218
1219 /* Recursively build an SLP tree starting from NODE.
1220 Fail (and return a value not equal to zero) if def-stmts are not
1221 isomorphic, require data permutation or are of unsupported types of
1222 operation. Otherwise, return 0.
1223 The value returned is the depth in the SLP tree where a mismatch
1224 was found. */
1225
1226 static slp_tree
1227 vect_build_slp_tree_2 (vec_info *vinfo,
1228 vec<stmt_vec_info> stmts, unsigned int group_size,
1229 poly_uint64 *max_nunits,
1230 bool *matches, unsigned *npermutes, unsigned *tree_size,
1231 scalar_stmts_to_slp_tree_map_t *bst_map)
1232 {
1233 unsigned nops, i, this_tree_size = 0;
1234 poly_uint64 this_max_nunits = *max_nunits;
1235 slp_tree node;
1236
1237 matches[0] = false;
1238
1239 stmt_vec_info stmt_info = stmts[0];
1240 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1241 nops = gimple_call_num_args (stmt);
1242 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1243 {
1244 nops = gimple_num_ops (stmt) - 1;
1245 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1246 nops++;
1247 }
1248 else if (is_a <gphi *> (stmt_info->stmt))
1249 nops = 0;
1250 else
1251 return NULL;
1252
1253 /* If the SLP node is a PHI (induction or reduction), terminate
1254 the recursion. */
1255 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1256 {
1257 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1258 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
1259 if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
1260 max_nunits))
1261 return NULL;
1262
1263 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1264 /* Induction from different IVs is not supported. */
1265 if (def_type == vect_induction_def)
1266 {
1267 stmt_vec_info other_info;
1268 FOR_EACH_VEC_ELT (stmts, i, other_info)
1269 if (stmt_info != other_info)
1270 return NULL;
1271 }
1272 else if (def_type == vect_reduction_def
1273 || def_type == vect_double_reduction_def
1274 || def_type == vect_nested_cycle)
1275 {
1276 /* Else def types have to match. */
1277 stmt_vec_info other_info;
1278 FOR_EACH_VEC_ELT (stmts, i, other_info)
1279 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1280 return NULL;
1281 }
1282 else
1283 return NULL;
1284 (*tree_size)++;
1285 node = vect_create_new_slp_node (stmts);
1286 return node;
1287 }
1288
1289
1290 bool two_operators = false;
1291 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1292 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
1293 &this_max_nunits, matches, &two_operators))
1294 return NULL;
1295
1296 /* If the SLP node is a load, terminate the recursion unless masked. */
1297 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1298 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1299 {
1300 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1301 {
1302 /* Masked load. */
1303 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1304 nops = 1;
1305 }
1306 else
1307 {
1308 *max_nunits = this_max_nunits;
1309 (*tree_size)++;
1310 node = vect_create_new_slp_node (stmts);
1311 /* And compute the load permutation. Whether it is actually
1312 a permutation depends on the unrolling factor which is
1313 decided later. */
1314 vec<unsigned> load_permutation;
1315 int j;
1316 stmt_vec_info load_info;
1317 load_permutation.create (group_size);
1318 stmt_vec_info first_stmt_info
1319 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
1320 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1321 {
1322 int load_place = vect_get_place_in_interleaving_chain
1323 (load_info, first_stmt_info);
1324 gcc_assert (load_place != -1);
1325 load_permutation.safe_push (load_place);
1326 }
1327 SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
1328 return node;
1329 }
1330 }
1331
1332 /* Get at the operands, verifying they are compatible. */
1333 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1334 slp_oprnd_info oprnd_info;
1335 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1336 {
1337 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1338 stmts, i, &oprnds_info);
1339 if (res != 0)
1340 matches[(res == -1) ? 0 : i] = false;
1341 if (!matches[0])
1342 break;
1343 }
1344 for (i = 0; i < group_size; ++i)
1345 if (!matches[i])
1346 {
1347 vect_free_oprnd_info (oprnds_info);
1348 return NULL;
1349 }
1350
1351 auto_vec<slp_tree, 4> children;
1352
1353 stmt_info = stmts[0];
1354
1355 /* Create SLP_TREE nodes for the definition node/s. */
1356 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1357 {
1358 slp_tree child;
1359 unsigned old_tree_size = this_tree_size;
1360 unsigned int j;
1361
1362 if (oprnd_info->first_dt == vect_uninitialized_def)
1363 {
1364 /* COND_EXPR have one too many eventually if the condition
1365 is a SSA name. */
1366 gcc_assert (i == 3 && nops == 4);
1367 continue;
1368 }
1369
1370 if (oprnd_info->first_dt != vect_internal_def
1371 && oprnd_info->first_dt != vect_reduction_def
1372 && oprnd_info->first_dt != vect_induction_def)
1373 {
1374 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1375 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1376 oprnd_info->ops = vNULL;
1377 children.safe_push (invnode);
1378 continue;
1379 }
1380
1381 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1382 group_size, &this_max_nunits,
1383 matches, npermutes,
1384 &this_tree_size, bst_map)) != NULL)
1385 {
1386 /* If we have all children of a non-unary child built up from
1387 scalars then just throw that away and build it up this node
1388 from scalars. */
1389 if (is_a <bb_vec_info> (vinfo)
1390 && SLP_TREE_CHILDREN (child).length () > 1
1391 /* ??? Rejecting patterns this way doesn't work. We'd have to
1392 do extra work to cancel the pattern so the uses see the
1393 scalar version. */
1394 && !oprnd_info->any_pattern)
1395 {
1396 slp_tree grandchild;
1397
1398 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1399 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1400 break;
1401 if (!grandchild
1402 && vect_update_all_shared_vectypes (vinfo,
1403 oprnd_info->def_stmts))
1404 {
1405 /* Roll back. */
1406 this_tree_size = old_tree_size;
1407 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1408 vect_free_slp_tree (grandchild, false);
1409 SLP_TREE_CHILDREN (child).truncate (0);
1410
1411 if (dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE, vect_location,
1413 "Building parent vector operands from "
1414 "scalars instead\n");
1415 oprnd_info->def_stmts = vNULL;
1416 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1417 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1418 oprnd_info->ops = vNULL;
1419 ++this_tree_size;
1420 children.safe_push (child);
1421 continue;
1422 }
1423 }
1424
1425 oprnd_info->def_stmts = vNULL;
1426 children.safe_push (child);
1427 continue;
1428 }
1429
1430 /* If the SLP build failed fatally and we analyze a basic-block
1431 simply treat nodes we fail to build as externally defined
1432 (and thus build vectors from the scalar defs).
1433 The cost model will reject outright expensive cases.
1434 ??? This doesn't treat cases where permutation ultimatively
1435 fails (or we don't try permutation below). Ideally we'd
1436 even compute a permutation that will end up with the maximum
1437 SLP tree size... */
1438 if (is_a <bb_vec_info> (vinfo)
1439 && !matches[0]
1440 /* ??? Rejecting patterns this way doesn't work. We'd have to
1441 do extra work to cancel the pattern so the uses see the
1442 scalar version. */
1443 && !is_pattern_stmt_p (stmt_info)
1444 && !oprnd_info->any_pattern
1445 && vect_update_all_shared_vectypes (vinfo, oprnd_info->def_stmts))
1446 {
1447 if (dump_enabled_p ())
1448 dump_printf_loc (MSG_NOTE, vect_location,
1449 "Building vector operands from scalars\n");
1450 this_tree_size++;
1451 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1452 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1453 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1454 children.safe_push (child);
1455 oprnd_info->ops = vNULL;
1456 oprnd_info->def_stmts = vNULL;
1457 continue;
1458 }
1459
1460 /* If the SLP build for operand zero failed and operand zero
1461 and one can be commutated try that for the scalar stmts
1462 that failed the match. */
1463 if (i == 0
1464 /* A first scalar stmt mismatch signals a fatal mismatch. */
1465 && matches[0]
1466 /* ??? For COND_EXPRs we can swap the comparison operands
1467 as well as the arms under some constraints. */
1468 && nops == 2
1469 && oprnds_info[1]->first_dt == vect_internal_def
1470 && is_gimple_assign (stmt_info->stmt)
1471 /* Swapping operands for reductions breaks assumptions later on. */
1472 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1473 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1474 /* Do so only if the number of not successful permutes was nor more
1475 than a cut-ff as re-trying the recursive match on
1476 possibly each level of the tree would expose exponential
1477 behavior. */
1478 && *npermutes < 4)
1479 {
1480 /* See whether we can swap the matching or the non-matching
1481 stmt operands. */
1482 bool swap_not_matching = true;
1483 do
1484 {
1485 for (j = 0; j < group_size; ++j)
1486 {
1487 if (matches[j] != !swap_not_matching)
1488 continue;
1489 stmt_vec_info stmt_info = stmts[j];
1490 /* Verify if we can swap operands of this stmt. */
1491 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1492 if (!stmt
1493 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1494 {
1495 if (!swap_not_matching)
1496 goto fail;
1497 swap_not_matching = false;
1498 break;
1499 }
1500 }
1501 }
1502 while (j != group_size);
1503
1504 /* Swap mismatched definition stmts. */
1505 if (dump_enabled_p ())
1506 dump_printf_loc (MSG_NOTE, vect_location,
1507 "Re-trying with swapped operands of stmts ");
1508 for (j = 0; j < group_size; ++j)
1509 if (matches[j] == !swap_not_matching)
1510 {
1511 std::swap (oprnds_info[0]->def_stmts[j],
1512 oprnds_info[1]->def_stmts[j]);
1513 std::swap (oprnds_info[0]->ops[j],
1514 oprnds_info[1]->ops[j]);
1515 if (dump_enabled_p ())
1516 dump_printf (MSG_NOTE, "%d ", j);
1517 }
1518 if (dump_enabled_p ())
1519 dump_printf (MSG_NOTE, "\n");
1520 /* And try again with scratch 'matches' ... */
1521 bool *tem = XALLOCAVEC (bool, group_size);
1522 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1523 group_size, &this_max_nunits,
1524 tem, npermutes,
1525 &this_tree_size, bst_map)) != NULL)
1526 {
1527 /* If we have all children of a non-unary child built up from
1528 scalars then just throw that away and build it up this node
1529 from scalars. */
1530 if (is_a <bb_vec_info> (vinfo)
1531 && SLP_TREE_CHILDREN (child).length () > 1
1532 /* ??? Rejecting patterns this way doesn't work. We'd have
1533 to do extra work to cancel the pattern so the uses see the
1534 scalar version. */
1535 && !oprnd_info->any_pattern)
1536 {
1537 unsigned int j;
1538 slp_tree grandchild;
1539
1540 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1541 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1542 break;
1543 if (!grandchild
1544 && (vect_update_all_shared_vectypes
1545 (vinfo, oprnd_info->def_stmts)))
1546 {
1547 /* Roll back. */
1548 this_tree_size = old_tree_size;
1549 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1550 vect_free_slp_tree (grandchild, false);
1551 SLP_TREE_CHILDREN (child).truncate (0);
1552
1553 if (dump_enabled_p ())
1554 dump_printf_loc (MSG_NOTE, vect_location,
1555 "Building parent vector operands from "
1556 "scalars instead\n");
1557 oprnd_info->def_stmts = vNULL;
1558 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1559 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1560 oprnd_info->ops = vNULL;
1561 ++this_tree_size;
1562 children.safe_push (child);
1563 continue;
1564 }
1565 }
1566
1567 oprnd_info->def_stmts = vNULL;
1568 children.safe_push (child);
1569 continue;
1570 }
1571
1572 ++*npermutes;
1573 }
1574
1575 fail:
1576 gcc_assert (child == NULL);
1577 FOR_EACH_VEC_ELT (children, j, child)
1578 vect_free_slp_tree (child, false);
1579 vect_free_oprnd_info (oprnds_info);
1580 return NULL;
1581 }
1582
1583 vect_free_oprnd_info (oprnds_info);
1584
1585 *tree_size += this_tree_size + 1;
1586 *max_nunits = this_max_nunits;
1587
1588 node = vect_create_new_slp_node (stmts);
1589 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1590 SLP_TREE_CHILDREN (node).splice (children);
1591 return node;
1592 }
1593
1594 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1595
1596 static void
1597 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1598 slp_tree node, hash_set<slp_tree> &visited)
1599 {
1600 unsigned i, j;
1601 stmt_vec_info stmt_info;
1602 slp_tree child;
1603 tree op;
1604
1605 if (visited.add (node))
1606 return;
1607
1608 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1609 dump_user_location_t user_loc = loc.get_user_location ();
1610 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1611 SLP_TREE_DEF_TYPE (node) == vect_external_def
1612 ? " (external)"
1613 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1614 ? " (constant)"
1615 : ""), node,
1616 estimated_poly_value (node->max_nunits), node->refcnt);
1617 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1618 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1619 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1620 else
1621 {
1622 dump_printf_loc (metadata, user_loc, "\t{ ");
1623 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1624 dump_printf (metadata, "%T%s ", op,
1625 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1626 dump_printf (metadata, "}\n");
1627 }
1628 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1629 {
1630 dump_printf_loc (metadata, user_loc, "\tload permutation {");
1631 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), i, j)
1632 dump_printf (dump_kind, " %u", j);
1633 dump_printf (dump_kind, " }\n");
1634 }
1635 if (SLP_TREE_CHILDREN (node).is_empty ())
1636 return;
1637 dump_printf_loc (metadata, user_loc, "\tchildren");
1638 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1639 dump_printf (dump_kind, " %p", (void *)child);
1640 dump_printf (dump_kind, "\n");
1641 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1642 vect_print_slp_tree (dump_kind, loc, child, visited);
1643 }
1644
1645 static void
1646 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1647 slp_tree node)
1648 {
1649 hash_set<slp_tree> visited;
1650 vect_print_slp_tree (dump_kind, loc, node, visited);
1651 }
1652
1653 /* Mark the tree rooted at NODE with PURE_SLP. */
1654
1655 static void
1656 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1657 {
1658 int i;
1659 stmt_vec_info stmt_info;
1660 slp_tree child;
1661
1662 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1663 return;
1664
1665 if (visited.add (node))
1666 return;
1667
1668 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1669 STMT_SLP_TYPE (stmt_info) = pure_slp;
1670
1671 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1672 vect_mark_slp_stmts (child, visited);
1673 }
1674
1675 static void
1676 vect_mark_slp_stmts (slp_tree node)
1677 {
1678 hash_set<slp_tree> visited;
1679 vect_mark_slp_stmts (node, visited);
1680 }
1681
1682 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1683
1684 static void
1685 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1686 {
1687 int i;
1688 stmt_vec_info stmt_info;
1689 slp_tree child;
1690
1691 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1692 return;
1693
1694 if (visited.add (node))
1695 return;
1696
1697 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1698 {
1699 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1700 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1701 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1702 }
1703
1704 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1705 vect_mark_slp_stmts_relevant (child, visited);
1706 }
1707
1708 static void
1709 vect_mark_slp_stmts_relevant (slp_tree node)
1710 {
1711 hash_set<slp_tree> visited;
1712 vect_mark_slp_stmts_relevant (node, visited);
1713 }
1714
1715 /* Copy the SLP subtree rooted at NODE. */
1716
1717 static slp_tree
1718 slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
1719 {
1720 unsigned i;
1721
1722 bool existed_p;
1723 slp_tree &copy_ref = map.get_or_insert (node, &existed_p);
1724 if (existed_p)
1725 return copy_ref;
1726
1727 copy_ref = XNEW (_slp_tree);
1728 slp_tree copy = copy_ref;
1729 memcpy (copy, node, sizeof (_slp_tree));
1730 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1731 {
1732 SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
1733 stmt_vec_info stmt_info;
1734 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1735 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
1736 }
1737 if (SLP_TREE_SCALAR_OPS (node).exists ())
1738 SLP_TREE_SCALAR_OPS (copy) = SLP_TREE_SCALAR_OPS (node).copy ();
1739 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1740 SLP_TREE_LOAD_PERMUTATION (copy) = SLP_TREE_LOAD_PERMUTATION (node).copy ();
1741 if (SLP_TREE_CHILDREN (node).exists ())
1742 SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
1743 gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
1744 copy->refcnt = 0;
1745
1746 slp_tree child;
1747 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
1748 {
1749 SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map);
1750 SLP_TREE_CHILDREN (copy)[i]->refcnt++;
1751 }
1752 return copy;
1753 }
1754
1755 /* Rearrange the statements of NODE according to PERMUTATION. */
1756
1757 static void
1758 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1759 vec<unsigned> permutation,
1760 hash_set<slp_tree> &visited)
1761 {
1762 unsigned int i;
1763 slp_tree child;
1764
1765 if (visited.add (node))
1766 return;
1767
1768 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1769 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1770
1771 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1772 {
1773 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1774 /* ??? Computation nodes are isomorphic and need no rearrangement.
1775 This is a quick hack to cover those where rearrangement breaks
1776 semantics because only the first stmt is guaranteed to have the
1777 correct operation code due to others being swapped or inverted. */
1778 stmt_vec_info first = SLP_TREE_SCALAR_STMTS (node)[0];
1779 if (is_gimple_assign (first->stmt)
1780 && gimple_assign_rhs_code (first->stmt) == COND_EXPR)
1781 return;
1782 vec<stmt_vec_info> tmp_stmts;
1783 tmp_stmts.create (group_size);
1784 tmp_stmts.quick_grow (group_size);
1785 stmt_vec_info stmt_info;
1786 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1787 tmp_stmts[permutation[i]] = stmt_info;
1788 SLP_TREE_SCALAR_STMTS (node).release ();
1789 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1790 }
1791 if (SLP_TREE_SCALAR_OPS (node).exists ())
1792 {
1793 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1794 vec<tree> tmp_ops;
1795 tmp_ops.create (group_size);
1796 tmp_ops.quick_grow (group_size);
1797 tree op;
1798 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1799 tmp_ops[permutation[i]] = op;
1800 SLP_TREE_SCALAR_OPS (node).release ();
1801 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1802 }
1803 }
1804
1805
1806 /* Attempt to reorder stmts in a reduction chain so that we don't
1807 require any load permutation. Return true if that was possible,
1808 otherwise return false. */
1809
1810 static bool
1811 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1812 {
1813 unsigned int i, j;
1814 unsigned int lidx;
1815 slp_tree node, load;
1816
1817 if (SLP_INSTANCE_LOADS (slp_instn).is_empty ())
1818 return false;
1819
1820 /* Compare all the permutation sequences to the first one. We know
1821 that at least one load is permuted. */
1822 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1823 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1824 return false;
1825 unsigned int group_size = SLP_TREE_LOAD_PERMUTATION (node).length ();
1826 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1827 {
1828 if (!SLP_TREE_LOAD_PERMUTATION (load).exists ()
1829 || SLP_TREE_LOAD_PERMUTATION (load).length () != group_size)
1830 return false;
1831 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load), j, lidx)
1832 if (lidx != SLP_TREE_LOAD_PERMUTATION (node)[j])
1833 return false;
1834 }
1835
1836 /* Check that the loads in the first sequence are different and there
1837 are no gaps between them. */
1838 auto_sbitmap load_index (group_size);
1839 bitmap_clear (load_index);
1840 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1841 {
1842 if (lidx >= group_size)
1843 return false;
1844 if (bitmap_bit_p (load_index, lidx))
1845 return false;
1846
1847 bitmap_set_bit (load_index, lidx);
1848 }
1849 for (i = 0; i < group_size; i++)
1850 if (!bitmap_bit_p (load_index, i))
1851 return false;
1852
1853 /* This permutation is valid for reduction. Since the order of the
1854 statements in the nodes is not important unless they are memory
1855 accesses, we can rearrange the statements in all the nodes
1856 according to the order of the loads. */
1857
1858 /* We have to unshare the SLP tree we modify. */
1859 hash_map<slp_tree, slp_tree> map;
1860 slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map);
1861 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn), false);
1862 unshared->refcnt++;
1863 SLP_INSTANCE_TREE (slp_instn) = unshared;
1864 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1865 SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node);
1866 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1867
1868 /* Do the actual re-arrangement. */
1869 hash_set<slp_tree> visited;
1870 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1871 node->load_permutation, visited);
1872
1873 /* We are done, no actual permutations need to be generated. */
1874 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1875 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1876 {
1877 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1878 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1879 /* But we have to keep those permutations that are required because
1880 of handling of gaps. */
1881 if (known_eq (unrolling_factor, 1U)
1882 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1883 && DR_GROUP_GAP (first_stmt_info) == 0))
1884 SLP_TREE_LOAD_PERMUTATION (node).release ();
1885 else
1886 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1887 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1888 }
1889
1890 return true;
1891 }
1892
1893 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1894
1895 static void
1896 vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
1897 hash_set<slp_tree> &visited)
1898 {
1899 if (visited.add (node))
1900 return;
1901
1902 if (SLP_TREE_CHILDREN (node).length () == 0)
1903 {
1904 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1905 return;
1906 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1907 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1908 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1909 loads.safe_push (node);
1910 }
1911 else
1912 {
1913 unsigned i;
1914 slp_tree child;
1915 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1916 vect_gather_slp_loads (loads, child, visited);
1917 }
1918 }
1919
1920 static void
1921 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1922 {
1923 hash_set<slp_tree> visited;
1924 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst), node, visited);
1925 }
1926
1927
1928 /* Find the last store in SLP INSTANCE. */
1929
1930 stmt_vec_info
1931 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1932 {
1933 stmt_vec_info last = NULL;
1934 stmt_vec_info stmt_vinfo;
1935
1936 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1937 {
1938 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1939 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1940 }
1941
1942 return last;
1943 }
1944
1945 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1946 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1947 (also containing the first GROUP1_SIZE stmts, since stores are
1948 consecutive), the second containing the remainder.
1949 Return the first stmt in the second group. */
1950
1951 static stmt_vec_info
1952 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1953 {
1954 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1955 gcc_assert (group1_size > 0);
1956 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1957 gcc_assert (group2_size > 0);
1958 DR_GROUP_SIZE (first_vinfo) = group1_size;
1959
1960 stmt_vec_info stmt_info = first_vinfo;
1961 for (unsigned i = group1_size; i > 1; i--)
1962 {
1963 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1964 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1965 }
1966 /* STMT is now the last element of the first group. */
1967 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1968 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1969
1970 DR_GROUP_SIZE (group2) = group2_size;
1971 for (stmt_info = group2; stmt_info;
1972 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1973 {
1974 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1975 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1976 }
1977
1978 /* For the second group, the DR_GROUP_GAP is that before the original group,
1979 plus skipping over the first vector. */
1980 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1981
1982 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1983 DR_GROUP_GAP (first_vinfo) += group2_size;
1984
1985 if (dump_enabled_p ())
1986 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1987 group1_size, group2_size);
1988
1989 return group2;
1990 }
1991
1992 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1993 statements and a vector of NUNITS elements. */
1994
1995 static poly_uint64
1996 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1997 {
1998 return exact_div (common_multiple (nunits, group_size), group_size);
1999 }
2000
2001 /* Analyze an SLP instance starting from a group of grouped stores. Call
2002 vect_build_slp_tree to build a tree of packed stmts if possible.
2003 Return FALSE if it's impossible to SLP any stmt in the loop. */
2004
2005 static bool
2006 vect_analyze_slp_instance (vec_info *vinfo,
2007 scalar_stmts_to_slp_tree_map_t *bst_map,
2008 stmt_vec_info stmt_info, unsigned max_tree_size)
2009 {
2010 slp_instance new_instance;
2011 slp_tree node;
2012 unsigned int group_size;
2013 tree vectype, scalar_type = NULL_TREE;
2014 unsigned int i;
2015 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2016 vec<stmt_vec_info> scalar_stmts;
2017 bool constructor = false;
2018
2019 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2020 {
2021 scalar_type = TREE_TYPE (DR_REF (dr));
2022 group_size = DR_GROUP_SIZE (stmt_info);
2023 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
2024 }
2025 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2026 {
2027 gcc_assert (is_a <loop_vec_info> (vinfo));
2028 vectype = STMT_VINFO_VECTYPE (stmt_info);
2029 group_size = REDUC_GROUP_SIZE (stmt_info);
2030 }
2031 else if (is_gimple_assign (stmt_info->stmt)
2032 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2033 {
2034 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2035 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2036 constructor = true;
2037 }
2038 else
2039 {
2040 gcc_assert (is_a <loop_vec_info> (vinfo));
2041 vectype = STMT_VINFO_VECTYPE (stmt_info);
2042 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2043 }
2044
2045 if (!vectype)
2046 {
2047 if (dump_enabled_p ())
2048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2049 "Build SLP failed: unsupported data-type %T\n",
2050 scalar_type);
2051
2052 return false;
2053 }
2054 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2055
2056 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2057 scalar_stmts.create (group_size);
2058 stmt_vec_info next_info = stmt_info;
2059 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2060 {
2061 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2062 while (next_info)
2063 {
2064 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2065 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2066 }
2067 }
2068 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2069 {
2070 /* Collect the reduction stmts and store them in
2071 SLP_TREE_SCALAR_STMTS. */
2072 while (next_info)
2073 {
2074 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2075 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2076 }
2077 /* Mark the first element of the reduction chain as reduction to properly
2078 transform the node. In the reduction analysis phase only the last
2079 element of the chain is marked as reduction. */
2080 STMT_VINFO_DEF_TYPE (stmt_info)
2081 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
2082 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2083 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
2084 }
2085 else if (constructor)
2086 {
2087 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2088 tree val;
2089 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2090 {
2091 if (TREE_CODE (val) == SSA_NAME)
2092 {
2093 gimple* def = SSA_NAME_DEF_STMT (val);
2094 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2095 /* Value is defined in another basic block. */
2096 if (!def_info)
2097 return false;
2098 def_info = vect_stmt_to_vectorize (def_info);
2099 scalar_stmts.safe_push (def_info);
2100 }
2101 else
2102 return false;
2103 }
2104 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_NOTE, vect_location,
2106 "Analyzing vectorizable constructor: %G\n",
2107 stmt_info->stmt);
2108 }
2109 else
2110 {
2111 /* Collect reduction statements. */
2112 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2113 for (i = 0; reductions.iterate (i, &next_info); i++)
2114 scalar_stmts.safe_push (next_info);
2115 }
2116
2117 /* Build the tree for the SLP instance. */
2118 bool *matches = XALLOCAVEC (bool, group_size);
2119 unsigned npermutes = 0;
2120 poly_uint64 max_nunits = nunits;
2121 unsigned tree_size = 0;
2122 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2123 &max_nunits, matches, &npermutes,
2124 &tree_size, bst_map);
2125 if (node != NULL)
2126 {
2127 /* Calculate the unrolling factor based on the smallest type. */
2128 poly_uint64 unrolling_factor
2129 = calculate_unrolling_factor (max_nunits, group_size);
2130
2131 if (maybe_ne (unrolling_factor, 1U)
2132 && is_a <bb_vec_info> (vinfo))
2133 {
2134 unsigned HOST_WIDE_INT const_max_nunits;
2135 if (!max_nunits.is_constant (&const_max_nunits)
2136 || const_max_nunits > group_size)
2137 {
2138 if (dump_enabled_p ())
2139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2140 "Build SLP failed: store group "
2141 "size not a multiple of the vector size "
2142 "in basic block SLP\n");
2143 vect_free_slp_tree (node, false);
2144 return false;
2145 }
2146 /* Fatal mismatch. */
2147 matches[group_size / const_max_nunits * const_max_nunits] = false;
2148 vect_free_slp_tree (node, false);
2149 }
2150 else
2151 {
2152 /* Create a new SLP instance. */
2153 new_instance = XNEW (class _slp_instance);
2154 SLP_INSTANCE_TREE (new_instance) = node;
2155 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2156 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2157 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2158
2159 vect_gather_slp_loads (new_instance, node);
2160 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_NOTE, vect_location,
2162 "SLP size %u vs. limit %u.\n",
2163 tree_size, max_tree_size);
2164
2165 /* Check whether any load is possibly permuted. */
2166 slp_tree load_node;
2167 bool loads_permuted = false;
2168 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2169 {
2170 if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
2171 continue;
2172 unsigned j;
2173 stmt_vec_info load_info;
2174 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2175 if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
2176 {
2177 loads_permuted = true;
2178 break;
2179 }
2180 }
2181
2182 /* If the loads and stores can be handled with load/store-lane
2183 instructions do not generate this SLP instance. */
2184 if (is_a <loop_vec_info> (vinfo)
2185 && loads_permuted
2186 && dr && vect_store_lanes_supported (vectype, group_size, false))
2187 {
2188 slp_tree load_node;
2189 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2190 {
2191 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2192 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2193 /* Use SLP for strided accesses (or if we can't load-lanes). */
2194 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2195 || ! vect_load_lanes_supported
2196 (STMT_VINFO_VECTYPE (stmt_vinfo),
2197 DR_GROUP_SIZE (stmt_vinfo), false))
2198 break;
2199 }
2200 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2201 {
2202 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2204 "Built SLP cancelled: can use "
2205 "load/store-lanes\n");
2206 vect_free_slp_instance (new_instance, false);
2207 return false;
2208 }
2209 }
2210
2211 /* If this is a reduction chain with a conversion in front
2212 amend the SLP tree with a node for that. */
2213 if (!dr
2214 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2215 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2216 {
2217 /* Get at the conversion stmt - we know it's the single use
2218 of the last stmt of the reduction chain. */
2219 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2220 use_operand_p use_p;
2221 gimple *use_stmt;
2222 bool r = single_imm_use (gimple_assign_lhs (tem),
2223 &use_p, &use_stmt);
2224 gcc_assert (r);
2225 next_info = vinfo->lookup_stmt (use_stmt);
2226 next_info = vect_stmt_to_vectorize (next_info);
2227 scalar_stmts = vNULL;
2228 scalar_stmts.create (group_size);
2229 for (unsigned i = 0; i < group_size; ++i)
2230 scalar_stmts.quick_push (next_info);
2231 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2232 SLP_TREE_CHILDREN (conv).quick_push (node);
2233 SLP_INSTANCE_TREE (new_instance) = conv;
2234 /* We also have to fake this conversion stmt as SLP reduction
2235 group so we don't have to mess with too much code
2236 elsewhere. */
2237 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2238 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2239 }
2240
2241 vinfo->slp_instances.safe_push (new_instance);
2242
2243 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2244 the number of scalar stmts in the root in a few places.
2245 Verify that assumption holds. */
2246 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
2247 .length () == group_size);
2248
2249 if (dump_enabled_p ())
2250 {
2251 dump_printf_loc (MSG_NOTE, vect_location,
2252 "Final SLP tree for instance:\n");
2253 vect_print_slp_tree (MSG_NOTE, vect_location,
2254 SLP_INSTANCE_TREE (new_instance));
2255 }
2256
2257 return true;
2258 }
2259 }
2260 else
2261 {
2262 /* Failed to SLP. */
2263 /* Free the allocated memory. */
2264 scalar_stmts.release ();
2265 }
2266
2267 /* For basic block SLP, try to break the group up into multiples of the
2268 vector size. */
2269 unsigned HOST_WIDE_INT const_nunits;
2270 if (is_a <bb_vec_info> (vinfo)
2271 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2272 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2273 && nunits.is_constant (&const_nunits))
2274 {
2275 /* We consider breaking the group only on VF boundaries from the existing
2276 start. */
2277 for (i = 0; i < group_size; i++)
2278 if (!matches[i]) break;
2279
2280 if (i >= const_nunits && i < group_size)
2281 {
2282 /* Split into two groups at the first vector boundary before i. */
2283 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2284 unsigned group1_size = i & ~(const_nunits - 1);
2285
2286 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2287 group1_size);
2288 bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
2289 max_tree_size);
2290 /* If the first non-match was in the middle of a vector,
2291 skip the rest of that vector. */
2292 if (group1_size < i)
2293 {
2294 i = group1_size + const_nunits;
2295 if (i < group_size)
2296 rest = vect_split_slp_store_group (rest, const_nunits);
2297 }
2298 if (i < group_size)
2299 res |= vect_analyze_slp_instance (vinfo, bst_map,
2300 rest, max_tree_size);
2301 return res;
2302 }
2303 /* Even though the first vector did not all match, we might be able to SLP
2304 (some) of the remainder. FORNOW ignore this possibility. */
2305 }
2306
2307 return false;
2308 }
2309
2310
2311 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2312 trees of packed scalar stmts if SLP is possible. */
2313
2314 opt_result
2315 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2316 {
2317 unsigned int i;
2318 stmt_vec_info first_element;
2319
2320 DUMP_VECT_SCOPE ("vect_analyze_slp");
2321
2322 scalar_stmts_to_slp_tree_map_t *bst_map
2323 = new scalar_stmts_to_slp_tree_map_t ();
2324
2325 /* Find SLP sequences starting from groups of grouped stores. */
2326 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2327 vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
2328
2329 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2330 {
2331 if (loop_vinfo->reduction_chains.length () > 0)
2332 {
2333 /* Find SLP sequences starting from reduction chains. */
2334 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2335 if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
2336 max_tree_size))
2337 {
2338 /* Dissolve reduction chain group. */
2339 stmt_vec_info vinfo = first_element;
2340 stmt_vec_info last = NULL;
2341 while (vinfo)
2342 {
2343 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2344 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2345 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2346 last = vinfo;
2347 vinfo = next;
2348 }
2349 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2350 /* It can be still vectorized as part of an SLP reduction. */
2351 loop_vinfo->reductions.safe_push (last);
2352 }
2353 }
2354
2355 /* Find SLP sequences starting from groups of reductions. */
2356 if (loop_vinfo->reductions.length () > 1)
2357 vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
2358 max_tree_size);
2359 }
2360
2361 /* The map keeps a reference on SLP nodes built, release that. */
2362 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2363 it != bst_map->end (); ++it)
2364 if ((*it).second)
2365 vect_free_slp_tree ((*it).second, false);
2366 delete bst_map;
2367
2368 /* Optimize permutations in SLP reductions. */
2369 slp_instance instance;
2370 FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
2371 {
2372 slp_tree node = SLP_INSTANCE_TREE (instance);
2373 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2374 /* Reduction (there are no data-refs in the root).
2375 In reduction chain the order of the loads is not important. */
2376 if (!STMT_VINFO_DATA_REF (stmt_info)
2377 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2378 vect_attempt_slp_rearrange_stmts (instance);
2379 }
2380
2381 /* Gather all loads in the SLP graph. */
2382 hash_set<slp_tree> visited;
2383 FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
2384 vect_gather_slp_loads (vinfo->slp_loads, SLP_INSTANCE_TREE (instance),
2385 visited);
2386
2387 return opt_result::success ();
2388 }
2389
2390 void
2391 vect_optimize_slp (vec_info *vinfo)
2392 {
2393 slp_tree node;
2394 unsigned i;
2395 FOR_EACH_VEC_ELT (vinfo->slp_loads, i, node)
2396 {
2397 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
2398 continue;
2399
2400 /* In basic block vectorization we allow any subchain of an interleaving
2401 chain.
2402 FORNOW: not in loop SLP because of realignment complications. */
2403 if (is_a <bb_vec_info> (vinfo))
2404 {
2405 bool subchain_p = true;
2406 stmt_vec_info next_load_info = NULL;
2407 stmt_vec_info load_info;
2408 unsigned j;
2409 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
2410 {
2411 if (j != 0
2412 && (next_load_info != load_info
2413 || DR_GROUP_GAP (load_info) != 1))
2414 {
2415 subchain_p = false;
2416 break;
2417 }
2418 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
2419 }
2420 if (subchain_p)
2421 {
2422 SLP_TREE_LOAD_PERMUTATION (node).release ();
2423 continue;
2424 }
2425 }
2426 else
2427 {
2428 stmt_vec_info load_info;
2429 bool this_load_permuted = false;
2430 unsigned j;
2431 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
2432 if (SLP_TREE_LOAD_PERMUTATION (node)[j] != j)
2433 {
2434 this_load_permuted = true;
2435 break;
2436 }
2437 stmt_vec_info first_stmt_info
2438 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
2439 if (!this_load_permuted
2440 /* The load requires permutation when unrolling exposes
2441 a gap either because the group is larger than the SLP
2442 group-size or because there is a gap between the groups. */
2443 && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a <loop_vec_info> (vinfo)), 1U)
2444 || ((SLP_TREE_SCALAR_STMTS (node).length ()
2445 == DR_GROUP_SIZE (first_stmt_info))
2446 && DR_GROUP_GAP (first_stmt_info) == 0)))
2447 {
2448 SLP_TREE_LOAD_PERMUTATION (node).release ();
2449 continue;
2450 }
2451 }
2452 }
2453 }
2454
2455
2456 /* For each possible SLP instance decide whether to SLP it and calculate overall
2457 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2458 least one instance. */
2459
2460 bool
2461 vect_make_slp_decision (loop_vec_info loop_vinfo)
2462 {
2463 unsigned int i;
2464 poly_uint64 unrolling_factor = 1;
2465 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2466 slp_instance instance;
2467 int decided_to_slp = 0;
2468
2469 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2470
2471 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2472 {
2473 /* FORNOW: SLP if you can. */
2474 /* All unroll factors have the form:
2475
2476 GET_MODE_SIZE (vinfo->vector_mode) * X
2477
2478 for some rational X, so they must have a common multiple. */
2479 unrolling_factor
2480 = force_common_multiple (unrolling_factor,
2481 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2482
2483 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2484 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2485 loop-based vectorization. Such stmts will be marked as HYBRID. */
2486 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2487 decided_to_slp++;
2488 }
2489
2490 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2491
2492 if (decided_to_slp && dump_enabled_p ())
2493 {
2494 dump_printf_loc (MSG_NOTE, vect_location,
2495 "Decided to SLP %d instances. Unrolling factor ",
2496 decided_to_slp);
2497 dump_dec (MSG_NOTE, unrolling_factor);
2498 dump_printf (MSG_NOTE, "\n");
2499 }
2500
2501 return (decided_to_slp > 0);
2502 }
2503
2504 /* Private data for vect_detect_hybrid_slp. */
2505 struct vdhs_data
2506 {
2507 loop_vec_info loop_vinfo;
2508 vec<stmt_vec_info> *worklist;
2509 };
2510
2511 /* Walker for walk_gimple_op. */
2512
2513 static tree
2514 vect_detect_hybrid_slp (tree *tp, int *, void *data)
2515 {
2516 walk_stmt_info *wi = (walk_stmt_info *)data;
2517 vdhs_data *dat = (vdhs_data *)wi->info;
2518
2519 if (wi->is_lhs)
2520 return NULL_TREE;
2521
2522 stmt_vec_info def_stmt_info = dat->loop_vinfo->lookup_def (*tp);
2523 if (!def_stmt_info)
2524 return NULL_TREE;
2525 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
2526 if (PURE_SLP_STMT (def_stmt_info))
2527 {
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2530 def_stmt_info->stmt);
2531 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2532 dat->worklist->safe_push (def_stmt_info);
2533 }
2534
2535 return NULL_TREE;
2536 }
2537
2538 /* Find stmts that must be both vectorized and SLPed. */
2539
2540 void
2541 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2542 {
2543 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2544
2545 /* All stmts participating in SLP are marked pure_slp, all other
2546 stmts are loop_vect.
2547 First collect all loop_vect stmts into a worklist. */
2548 auto_vec<stmt_vec_info> worklist;
2549 for (unsigned i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2550 {
2551 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2552 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
2553 gsi_next (&gsi))
2554 {
2555 gphi *phi = gsi.phi ();
2556 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (phi);
2557 if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
2558 worklist.safe_push (stmt_info);
2559 }
2560 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2561 gsi_next (&gsi))
2562 {
2563 gimple *stmt = gsi_stmt (gsi);
2564 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2565 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2566 {
2567 for (gimple_stmt_iterator gsi2
2568 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2569 !gsi_end_p (gsi2); gsi_next (&gsi2))
2570 {
2571 stmt_vec_info patt_info
2572 = loop_vinfo->lookup_stmt (gsi_stmt (gsi2));
2573 if (!STMT_SLP_TYPE (patt_info)
2574 && STMT_VINFO_RELEVANT (patt_info))
2575 worklist.safe_push (patt_info);
2576 }
2577 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
2578 }
2579 if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
2580 worklist.safe_push (stmt_info);
2581 }
2582 }
2583
2584 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2585 mark any SLP vectorized stmt as hybrid.
2586 ??? We're visiting def stmts N times (once for each non-SLP and
2587 once for each hybrid-SLP use). */
2588 walk_stmt_info wi;
2589 vdhs_data dat;
2590 dat.worklist = &worklist;
2591 dat.loop_vinfo = loop_vinfo;
2592 memset (&wi, 0, sizeof (wi));
2593 wi.info = (void *)&dat;
2594 while (!worklist.is_empty ())
2595 {
2596 stmt_vec_info stmt_info = worklist.pop ();
2597 /* Since SSA operands are not set up for pattern stmts we need
2598 to use walk_gimple_op. */
2599 wi.is_lhs = 0;
2600 walk_gimple_op (stmt_info->stmt, vect_detect_hybrid_slp, &wi);
2601 }
2602 }
2603
2604
2605 /* Initialize a bb_vec_info struct for the statements between
2606 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2607
2608 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2609 gimple_stmt_iterator region_end_in,
2610 vec_info_shared *shared)
2611 : vec_info (vec_info::bb, init_cost (NULL), shared),
2612 bb (gsi_bb (region_begin_in)),
2613 region_begin (region_begin_in),
2614 region_end (region_end_in)
2615 {
2616 gimple_stmt_iterator gsi;
2617
2618 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2619 gsi_next (&gsi))
2620 {
2621 gimple *stmt = gsi_stmt (gsi);
2622 gimple_set_uid (stmt, 0);
2623 add_stmt (stmt);
2624 }
2625
2626 bb->aux = this;
2627 }
2628
2629
2630 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2631 stmts in the basic block. */
2632
2633 _bb_vec_info::~_bb_vec_info ()
2634 {
2635 for (gimple_stmt_iterator si = region_begin;
2636 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2637 /* Reset region marker. */
2638 gimple_set_uid (gsi_stmt (si), -1);
2639
2640 bb->aux = NULL;
2641 }
2642
2643 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2644 given then that child nodes have already been processed, and that
2645 their def types currently match their SLP node's def type. */
2646
2647 static bool
2648 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2649 slp_instance node_instance,
2650 stmt_vector_for_cost *cost_vec)
2651 {
2652 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2653 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2654
2655 /* Calculate the number of vector statements to be created for the
2656 scalar stmts in this node. For SLP reductions it is equal to the
2657 number of vector statements in the children (which has already been
2658 calculated by the recursive call). Otherwise it is the number of
2659 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2660 VF divided by the number of elements in a vector. */
2661 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2662 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2663 {
2664 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2665 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2666 {
2667 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2668 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2669 break;
2670 }
2671 }
2672 else
2673 {
2674 poly_uint64 vf;
2675 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2676 vf = loop_vinfo->vectorization_factor;
2677 else
2678 vf = 1;
2679 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
2680 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2681 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2682 = vect_get_num_vectors (vf * group_size, vectype);
2683 }
2684
2685 bool dummy;
2686 return vect_analyze_stmt (vinfo, stmt_info, &dummy,
2687 node, node_instance, cost_vec);
2688 }
2689
2690 /* Try to build NODE from scalars, returning true on success.
2691 NODE_INSTANCE is the SLP instance that contains NODE. */
2692
2693 static bool
2694 vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
2695 slp_instance node_instance)
2696 {
2697 stmt_vec_info stmt_info;
2698 unsigned int i;
2699
2700 if (!is_a <bb_vec_info> (vinfo)
2701 || node == SLP_INSTANCE_TREE (node_instance)
2702 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
2703 return false;
2704
2705 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_NOTE, vect_location,
2707 "Building vector operands from scalars instead\n");
2708
2709 /* Don't remove and free the child nodes here, since they could be
2710 referenced by other structures. The analysis and scheduling phases
2711 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2712 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
2713 SLP_TREE_DEF_TYPE (node) = vect_external_def;
2714 SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
2715 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2716 {
2717 tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
2718 SLP_TREE_SCALAR_OPS (node)[i] = lhs;
2719 }
2720 return true;
2721 }
2722
2723 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2724 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2725
2726 Return true if the operations are supported. */
2727
2728 static bool
2729 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2730 slp_instance node_instance,
2731 hash_set<slp_tree> &visited,
2732 hash_set<slp_tree> &lvisited,
2733 stmt_vector_for_cost *cost_vec)
2734 {
2735 int i, j;
2736 slp_tree child;
2737
2738 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2739 return true;
2740
2741 /* If we already analyzed the exact same set of scalar stmts we're done.
2742 We share the generated vector stmts for those.
2743 The SLP graph is acyclic so not caching whether we failed or succeeded
2744 doesn't result in any issue since we throw away the lvisited set
2745 when we fail. */
2746 if (visited.contains (node)
2747 || lvisited.add (node))
2748 return true;
2749
2750 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2751 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2752 visited, lvisited, cost_vec))
2753 return false;
2754
2755 /* ??? We have to catch the case late where two first scalar stmts appear
2756 in multiple SLP children with different def type and fail. Remember
2757 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2758 match it when that is vect_internal_def. */
2759 auto_vec<vect_def_type, 4> dt;
2760 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2762 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2763 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2764
2765 /* Push SLP node def-type to stmt operands. */
2766 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2767 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2768 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
2769 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2770 = SLP_TREE_DEF_TYPE (child);
2771
2772 /* Check everything worked out. */
2773 bool res = true;
2774 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2775 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2776 {
2777 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2778 {
2779 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2780 != SLP_TREE_DEF_TYPE (child))
2781 res = false;
2782 }
2783 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2784 != dt[j])
2785 res = false;
2786 }
2787 if (!res && dump_enabled_p ())
2788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2789 "not vectorized: same operand with different "
2790 "def type in stmt.\n");
2791
2792 if (res)
2793 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2794 cost_vec);
2795
2796 /* Restore def-types. */
2797 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2798 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2799 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2800
2801 /* If this node can't be vectorized, try pruning the tree here rather
2802 than felling the whole thing. */
2803 if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
2804 res = true;
2805
2806 return res;
2807 }
2808
2809
2810 /* Analyze statements in SLP instances of VINFO. Return true if the
2811 operations are supported. */
2812
2813 bool
2814 vect_slp_analyze_operations (vec_info *vinfo)
2815 {
2816 slp_instance instance;
2817 int i;
2818
2819 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2820
2821 hash_set<slp_tree> visited;
2822 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2823 {
2824 hash_set<slp_tree> lvisited;
2825 stmt_vector_for_cost cost_vec;
2826 cost_vec.create (2);
2827 if (!vect_slp_analyze_node_operations (vinfo,
2828 SLP_INSTANCE_TREE (instance),
2829 instance, visited, lvisited,
2830 &cost_vec)
2831 /* Instances with a root stmt require vectorized defs for the
2832 SLP tree root. */
2833 || (SLP_INSTANCE_ROOT_STMT (instance)
2834 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
2835 != vect_internal_def)))
2836 {
2837 slp_tree node = SLP_INSTANCE_TREE (instance);
2838 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2839 if (dump_enabled_p ())
2840 dump_printf_loc (MSG_NOTE, vect_location,
2841 "removing SLP instance operations starting from: %G",
2842 stmt_info->stmt);
2843 vect_free_slp_instance (instance, false);
2844 vinfo->slp_instances.ordered_remove (i);
2845 cost_vec.release ();
2846 }
2847 else
2848 {
2849 for (hash_set<slp_tree>::iterator x = lvisited.begin();
2850 x != lvisited.end(); ++x)
2851 visited.add (*x);
2852 i++;
2853
2854 add_stmt_costs (vinfo, vinfo->target_cost_data, &cost_vec);
2855 cost_vec.release ();
2856 }
2857 }
2858
2859 return !vinfo->slp_instances.is_empty ();
2860 }
2861
2862
2863 /* Compute the scalar cost of the SLP node NODE and its children
2864 and return it. Do not account defs that are marked in LIFE and
2865 update LIFE according to uses of NODE. */
2866
2867 static void
2868 vect_bb_slp_scalar_cost (vec_info *vinfo,
2869 slp_tree node, vec<bool, va_heap> *life,
2870 stmt_vector_for_cost *cost_vec,
2871 hash_set<slp_tree> &visited)
2872 {
2873 unsigned i;
2874 stmt_vec_info stmt_info;
2875 slp_tree child;
2876
2877 if (visited.add (node))
2878 return;
2879
2880 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2881 {
2882 gimple *stmt = stmt_info->stmt;
2883 ssa_op_iter op_iter;
2884 def_operand_p def_p;
2885
2886 if ((*life)[i])
2887 continue;
2888
2889 /* If there is a non-vectorized use of the defs then the scalar
2890 stmt is kept live in which case we do not account it or any
2891 required defs in the SLP children in the scalar cost. This
2892 way we make the vectorization more costly when compared to
2893 the scalar cost. */
2894 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2895 {
2896 imm_use_iterator use_iter;
2897 gimple *use_stmt;
2898 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2899 if (!is_gimple_debug (use_stmt))
2900 {
2901 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2902 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2903 {
2904 (*life)[i] = true;
2905 BREAK_FROM_IMM_USE_STMT (use_iter);
2906 }
2907 }
2908 }
2909 if ((*life)[i])
2910 continue;
2911
2912 /* Count scalar stmts only once. */
2913 if (gimple_visited_p (stmt))
2914 continue;
2915 gimple_set_visited (stmt, true);
2916
2917 vect_cost_for_stmt kind;
2918 if (STMT_VINFO_DATA_REF (stmt_info))
2919 {
2920 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2921 kind = scalar_load;
2922 else
2923 kind = scalar_store;
2924 }
2925 else if (vect_nop_conversion_p (stmt_info))
2926 continue;
2927 else
2928 kind = scalar_stmt;
2929 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2930 }
2931
2932 auto_vec<bool, 20> subtree_life;
2933 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2934 {
2935 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2936 {
2937 /* Do not directly pass LIFE to the recursive call, copy it to
2938 confine changes in the callee to the current child/subtree. */
2939 subtree_life.safe_splice (*life);
2940 vect_bb_slp_scalar_cost (vinfo, child, &subtree_life, cost_vec,
2941 visited);
2942 subtree_life.truncate (0);
2943 }
2944 }
2945 }
2946
2947 /* Check if vectorization of the basic block is profitable. */
2948
2949 static bool
2950 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2951 {
2952 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2953 slp_instance instance;
2954 int i;
2955 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2956 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2957
2958 /* Calculate scalar cost. */
2959 stmt_vector_for_cost scalar_costs;
2960 scalar_costs.create (0);
2961 hash_set<slp_tree> visited;
2962 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2963 {
2964 auto_vec<bool, 20> life;
2965 life.safe_grow_cleared
2966 (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance)).length ());
2967 vect_bb_slp_scalar_cost (bb_vinfo,
2968 SLP_INSTANCE_TREE (instance),
2969 &life, &scalar_costs, visited);
2970 }
2971 void *target_cost_data = init_cost (NULL);
2972 add_stmt_costs (bb_vinfo, target_cost_data, &scalar_costs);
2973 scalar_costs.release ();
2974 unsigned dummy;
2975 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2976 destroy_cost_data (target_cost_data);
2977
2978 /* Unset visited flag. */
2979 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2980 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2981 gimple_set_visited (gsi_stmt (gsi), false);
2982
2983 /* Complete the target-specific cost calculation. */
2984 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2985 &vec_inside_cost, &vec_epilogue_cost);
2986
2987 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2988
2989 if (dump_enabled_p ())
2990 {
2991 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2992 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2993 vec_inside_cost);
2994 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2995 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2996 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2997 }
2998
2999 /* Vectorization is profitable if its cost is more than the cost of scalar
3000 version. Note that we err on the vector side for equal cost because
3001 the cost estimate is otherwise quite pessimistic (constant uses are
3002 free on the scalar side but cost a load on the vector side for
3003 example). */
3004 if (vec_outside_cost + vec_inside_cost > scalar_cost)
3005 return false;
3006
3007 return true;
3008 }
3009
3010 /* Find any vectorizable constructors and add them to the grouped_store
3011 array. */
3012
3013 static void
3014 vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
3015 {
3016 gimple_stmt_iterator gsi;
3017
3018 for (gsi = bb_vinfo->region_begin;
3019 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3020 {
3021 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
3022 if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
3023 continue;
3024
3025 tree rhs = gimple_assign_rhs1 (stmt);
3026 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
3027 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
3028 CONSTRUCTOR_NELTS (rhs))
3029 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
3030 || uniform_vector_p (rhs))
3031 continue;
3032
3033 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3034 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3035 }
3036 }
3037
3038 /* Check if the region described by BB_VINFO can be vectorized, returning
3039 true if so. When returning false, set FATAL to true if the same failure
3040 would prevent vectorization at other vector sizes, false if it is still
3041 worth trying other sizes. N_STMTS is the number of statements in the
3042 region. */
3043
3044 static bool
3045 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
3046 {
3047 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3048
3049 slp_instance instance;
3050 int i;
3051 poly_uint64 min_vf = 2;
3052
3053 /* The first group of checks is independent of the vector size. */
3054 fatal = true;
3055
3056 /* Analyze the data references. */
3057
3058 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
3059 {
3060 if (dump_enabled_p ())
3061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3062 "not vectorized: unhandled data-ref in basic "
3063 "block.\n");
3064 return false;
3065 }
3066
3067 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
3068 {
3069 if (dump_enabled_p ())
3070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3071 "not vectorized: not enough data-refs in "
3072 "basic block.\n");
3073 return false;
3074 }
3075
3076 if (!vect_analyze_data_ref_accesses (bb_vinfo))
3077 {
3078 if (dump_enabled_p ())
3079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3080 "not vectorized: unhandled data access in "
3081 "basic block.\n");
3082 return false;
3083 }
3084
3085 vect_slp_check_for_constructors (bb_vinfo);
3086
3087 /* If there are no grouped stores in the region there is no need
3088 to continue with pattern recog as vect_analyze_slp will fail
3089 anyway. */
3090 if (bb_vinfo->grouped_stores.is_empty ())
3091 {
3092 if (dump_enabled_p ())
3093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3094 "not vectorized: no grouped stores in "
3095 "basic block.\n");
3096 return false;
3097 }
3098
3099 /* While the rest of the analysis below depends on it in some way. */
3100 fatal = false;
3101
3102 vect_pattern_recog (bb_vinfo);
3103
3104 /* Check the SLP opportunities in the basic block, analyze and build SLP
3105 trees. */
3106 if (!vect_analyze_slp (bb_vinfo, n_stmts))
3107 {
3108 if (dump_enabled_p ())
3109 {
3110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3111 "Failed to SLP the basic block.\n");
3112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3113 "not vectorized: failed to find SLP opportunities "
3114 "in basic block.\n");
3115 }
3116 return false;
3117 }
3118
3119 /* Optimize permutations. */
3120 vect_optimize_slp (bb_vinfo);
3121
3122 vect_record_base_alignments (bb_vinfo);
3123
3124 /* Analyze and verify the alignment of data references and the
3125 dependence in the SLP instances. */
3126 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3127 {
3128 if (! vect_slp_analyze_and_verify_instance_alignment (bb_vinfo, instance)
3129 || ! vect_slp_analyze_instance_dependence (bb_vinfo, instance))
3130 {
3131 slp_tree node = SLP_INSTANCE_TREE (instance);
3132 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3133 if (dump_enabled_p ())
3134 dump_printf_loc (MSG_NOTE, vect_location,
3135 "removing SLP instance operations starting from: %G",
3136 stmt_info->stmt);
3137 vect_free_slp_instance (instance, false);
3138 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3139 continue;
3140 }
3141
3142 /* Mark all the statements that we want to vectorize as pure SLP and
3143 relevant. */
3144 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
3145 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3146 if (SLP_INSTANCE_ROOT_STMT (instance))
3147 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
3148
3149 i++;
3150 }
3151 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3152 return false;
3153
3154 if (!vect_slp_analyze_operations (bb_vinfo))
3155 {
3156 if (dump_enabled_p ())
3157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3158 "not vectorized: bad operation in basic block.\n");
3159 return false;
3160 }
3161
3162 /* Cost model: check if the vectorization is worthwhile. */
3163 if (!unlimited_cost_model (NULL)
3164 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3165 {
3166 if (dump_enabled_p ())
3167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3168 "not vectorized: vectorization is not "
3169 "profitable.\n");
3170 return false;
3171 }
3172
3173 if (dump_enabled_p ())
3174 dump_printf_loc (MSG_NOTE, vect_location,
3175 "Basic block will be vectorized using SLP\n");
3176 return true;
3177 }
3178
3179 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3180 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3181 on success. The region has N_STMTS statements and has the datarefs
3182 given by DATAREFS. */
3183
3184 static bool
3185 vect_slp_bb_region (gimple_stmt_iterator region_begin,
3186 gimple_stmt_iterator region_end,
3187 vec<data_reference_p> datarefs,
3188 unsigned int n_stmts)
3189 {
3190 bb_vec_info bb_vinfo;
3191 auto_vector_modes vector_modes;
3192
3193 /* Autodetect first vector size we try. */
3194 machine_mode next_vector_mode = VOIDmode;
3195 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3196 unsigned int mode_i = 0;
3197
3198 vec_info_shared shared;
3199
3200 machine_mode autodetected_vector_mode = VOIDmode;
3201 while (1)
3202 {
3203 bool vectorized = false;
3204 bool fatal = false;
3205 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3206
3207 bool first_time_p = shared.datarefs.is_empty ();
3208 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3209 if (first_time_p)
3210 bb_vinfo->shared->save_datarefs ();
3211 else
3212 bb_vinfo->shared->check_datarefs ();
3213 bb_vinfo->vector_mode = next_vector_mode;
3214
3215 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
3216 && dbg_cnt (vect_slp))
3217 {
3218 if (dump_enabled_p ())
3219 {
3220 dump_printf_loc (MSG_NOTE, vect_location,
3221 "***** Analysis succeeded with vector mode"
3222 " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3223 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3224 }
3225
3226 bb_vinfo->shared->check_datarefs ();
3227 vect_schedule_slp (bb_vinfo);
3228
3229 unsigned HOST_WIDE_INT bytes;
3230 if (dump_enabled_p ())
3231 {
3232 if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
3233 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3234 "basic block part vectorized using %wu byte "
3235 "vectors\n", bytes);
3236 else
3237 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3238 "basic block part vectorized using variable "
3239 "length vectors\n");
3240 }
3241
3242 vectorized = true;
3243 }
3244 else
3245 {
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_NOTE, vect_location,
3248 "***** Analysis failed with vector mode %s\n",
3249 GET_MODE_NAME (bb_vinfo->vector_mode));
3250 }
3251
3252 if (mode_i == 0)
3253 autodetected_vector_mode = bb_vinfo->vector_mode;
3254
3255 if (!fatal)
3256 while (mode_i < vector_modes.length ()
3257 && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3258 {
3259 if (dump_enabled_p ())
3260 dump_printf_loc (MSG_NOTE, vect_location,
3261 "***** The result for vector mode %s would"
3262 " be the same\n",
3263 GET_MODE_NAME (vector_modes[mode_i]));
3264 mode_i += 1;
3265 }
3266
3267 delete bb_vinfo;
3268
3269 if (mode_i < vector_modes.length ()
3270 && VECTOR_MODE_P (autodetected_vector_mode)
3271 && (related_vector_mode (vector_modes[mode_i],
3272 GET_MODE_INNER (autodetected_vector_mode))
3273 == autodetected_vector_mode)
3274 && (related_vector_mode (autodetected_vector_mode,
3275 GET_MODE_INNER (vector_modes[mode_i]))
3276 == vector_modes[mode_i]))
3277 {
3278 if (dump_enabled_p ())
3279 dump_printf_loc (MSG_NOTE, vect_location,
3280 "***** Skipping vector mode %s, which would"
3281 " repeat the analysis for %s\n",
3282 GET_MODE_NAME (vector_modes[mode_i]),
3283 GET_MODE_NAME (autodetected_vector_mode));
3284 mode_i += 1;
3285 }
3286
3287 if (vectorized
3288 || mode_i == vector_modes.length ()
3289 || autodetected_vector_mode == VOIDmode
3290 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3291 vector sizes will fail do not bother iterating. */
3292 || fatal)
3293 return vectorized;
3294
3295 /* Try the next biggest vector size. */
3296 next_vector_mode = vector_modes[mode_i++];
3297 if (dump_enabled_p ())
3298 dump_printf_loc (MSG_NOTE, vect_location,
3299 "***** Re-trying analysis with vector mode %s\n",
3300 GET_MODE_NAME (next_vector_mode));
3301 }
3302 }
3303
3304 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3305 true if anything in the basic-block was vectorized. */
3306
3307 bool
3308 vect_slp_bb (basic_block bb)
3309 {
3310 gimple_stmt_iterator gsi;
3311 bool any_vectorized = false;
3312
3313 gsi = gsi_start_bb (bb);
3314 while (!gsi_end_p (gsi))
3315 {
3316 gimple_stmt_iterator region_begin = gsi;
3317 vec<data_reference_p> datarefs = vNULL;
3318 int insns = 0;
3319
3320 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3321 {
3322 gimple *stmt = gsi_stmt (gsi);
3323 if (is_gimple_debug (stmt))
3324 continue;
3325 insns++;
3326
3327 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3328 vect_location = stmt;
3329
3330 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3331 break;
3332 }
3333
3334 /* Skip leading unhandled stmts. */
3335 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3336 {
3337 gsi_next (&gsi);
3338 continue;
3339 }
3340
3341 gimple_stmt_iterator region_end = gsi;
3342
3343 if (insns > param_slp_max_insns_in_bb)
3344 {
3345 if (dump_enabled_p ())
3346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3347 "not vectorized: too many instructions in "
3348 "basic block.\n");
3349 }
3350 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
3351 any_vectorized = true;
3352
3353 if (gsi_end_p (region_end))
3354 break;
3355
3356 /* Skip the unhandled stmt. */
3357 gsi_next (&gsi);
3358 }
3359
3360 return any_vectorized;
3361 }
3362
3363
3364 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3365
3366 static bool
3367 vect_mask_constant_operand_p (vec_info *vinfo,
3368 stmt_vec_info stmt_vinfo, unsigned op_num)
3369 {
3370 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3371 tree op, vectype;
3372 enum vect_def_type dt;
3373
3374 /* For comparison and COND_EXPR type is chosen depending
3375 on the non-constant other comparison operand. */
3376 if (TREE_CODE_CLASS (code) == tcc_comparison)
3377 {
3378 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3379 op = gimple_assign_rhs1 (stmt);
3380
3381 if (!vect_is_simple_use (op, vinfo, &dt, &vectype))
3382 gcc_unreachable ();
3383
3384 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3385 }
3386
3387 if (code == COND_EXPR)
3388 {
3389 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3390 tree cond = gimple_assign_rhs1 (stmt);
3391
3392 if (TREE_CODE (cond) == SSA_NAME)
3393 {
3394 if (op_num > 0)
3395 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3396 op = cond;
3397 }
3398 else
3399 {
3400 if (op_num > 1)
3401 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3402 op = TREE_OPERAND (cond, 0);
3403 }
3404
3405 if (!vect_is_simple_use (op, vinfo, &dt, &vectype))
3406 gcc_unreachable ();
3407
3408 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3409 }
3410
3411 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3412 }
3413
3414 /* Build a variable-length vector in which the elements in ELTS are repeated
3415 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3416 RESULTS and add any new instructions to SEQ.
3417
3418 The approach we use is:
3419
3420 (1) Find a vector mode VM with integer elements of mode IM.
3421
3422 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3423 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3424 from small vectors to IM.
3425
3426 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3427
3428 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3429 correct byte contents.
3430
3431 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3432
3433 We try to find the largest IM for which this sequence works, in order
3434 to cut down on the number of interleaves. */
3435
3436 void
3437 duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
3438 vec<tree> elts, unsigned int nresults,
3439 vec<tree> &results)
3440 {
3441 unsigned int nelts = elts.length ();
3442 tree element_type = TREE_TYPE (vector_type);
3443
3444 /* (1) Find a vector mode VM with integer elements of mode IM. */
3445 unsigned int nvectors = 1;
3446 tree new_vector_type;
3447 tree permutes[2];
3448 if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
3449 &nvectors, &new_vector_type,
3450 permutes))
3451 gcc_unreachable ();
3452
3453 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3454 unsigned int partial_nelts = nelts / nvectors;
3455 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3456
3457 tree_vector_builder partial_elts;
3458 auto_vec<tree, 32> pieces (nvectors * 2);
3459 pieces.quick_grow (nvectors * 2);
3460 for (unsigned int i = 0; i < nvectors; ++i)
3461 {
3462 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3463 ELTS' has mode IM. */
3464 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3465 for (unsigned int j = 0; j < partial_nelts; ++j)
3466 partial_elts.quick_push (elts[i * partial_nelts + j]);
3467 tree t = gimple_build_vector (seq, &partial_elts);
3468 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3469 TREE_TYPE (new_vector_type), t);
3470
3471 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3472 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3473 }
3474
3475 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3476 correct byte contents.
3477
3478 We need to repeat the following operation log2(nvectors) times:
3479
3480 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3481 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3482
3483 However, if each input repeats every N elements and the VF is
3484 a multiple of N * 2, the HI result is the same as the LO. */
3485 unsigned int in_start = 0;
3486 unsigned int out_start = nvectors;
3487 unsigned int hi_start = nvectors / 2;
3488 /* A bound on the number of outputs needed to produce NRESULTS results
3489 in the final iteration. */
3490 unsigned int noutputs_bound = nvectors * nresults;
3491 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3492 {
3493 noutputs_bound /= 2;
3494 unsigned int limit = MIN (noutputs_bound, nvectors);
3495 for (unsigned int i = 0; i < limit; ++i)
3496 {
3497 if ((i & 1) != 0
3498 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3499 2 * in_repeat))
3500 {
3501 pieces[out_start + i] = pieces[out_start + i - 1];
3502 continue;
3503 }
3504
3505 tree output = make_ssa_name (new_vector_type);
3506 tree input1 = pieces[in_start + (i / 2)];
3507 tree input2 = pieces[in_start + (i / 2) + hi_start];
3508 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3509 input1, input2,
3510 permutes[i & 1]);
3511 gimple_seq_add_stmt (seq, stmt);
3512 pieces[out_start + i] = output;
3513 }
3514 std::swap (in_start, out_start);
3515 }
3516
3517 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3518 results.reserve (nresults);
3519 for (unsigned int i = 0; i < nresults; ++i)
3520 if (i < nvectors)
3521 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3522 pieces[in_start + i]));
3523 else
3524 results.quick_push (results[i - nvectors]);
3525 }
3526
3527
3528 /* For constant and loop invariant defs of SLP_NODE this function returns
3529 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3530 OP_NODE determines the node for the operand containing the scalar
3531 operands. */
3532
3533 static void
3534 vect_get_constant_vectors (vec_info *vinfo,
3535 slp_tree slp_node, unsigned op_num,
3536 vec<tree> *vec_oprnds)
3537 {
3538 slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
3539 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3540 unsigned HOST_WIDE_INT nunits;
3541 tree vec_cst;
3542 unsigned j, number_of_places_left_in_vector;
3543 tree vector_type;
3544 tree vop;
3545 int group_size = op_node->ops.length ();
3546 unsigned int vec_num, i;
3547 unsigned number_of_copies = 1;
3548 bool constant_p;
3549 tree neutral_op = NULL;
3550 gimple_seq ctor_seq = NULL;
3551 auto_vec<tree, 16> permute_results;
3552
3553 /* ??? SLP analysis should compute the vector type for the
3554 constant / invariant and store it in the SLP node. */
3555 tree op = op_node->ops[0];
3556 /* Check if vector type is a boolean vector. */
3557 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3558 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3559 && vect_mask_constant_operand_p (vinfo, stmt_vinfo, op_num))
3560 vector_type = truth_type_for (stmt_vectype);
3561 else
3562 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
3563
3564 /* ??? For lane-reducing ops we should also have the required number
3565 of vector stmts initialized rather than second-guessing here. */
3566 unsigned int number_of_vectors;
3567 if (is_gimple_assign (stmt_vinfo->stmt)
3568 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3569 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3570 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3571 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3572 else
3573 number_of_vectors
3574 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3575 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3576 vector_type);
3577 vec_oprnds->create (number_of_vectors);
3578 auto_vec<tree> voprnds (number_of_vectors);
3579
3580 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3581 created vectors. It is greater than 1 if unrolling is performed.
3582
3583 For example, we have two scalar operands, s1 and s2 (e.g., group of
3584 strided accesses of size two), while NUNITS is four (i.e., four scalars
3585 of this type can be packed in a vector). The output vector will contain
3586 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3587 will be 2).
3588
3589 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3590 containing the operands.
3591
3592 For example, NUNITS is four as before, and the group size is 8
3593 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3594 {s5, s6, s7, s8}. */
3595
3596 /* When using duplicate_and_interleave, we just need one element for
3597 each scalar statement. */
3598 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3599 nunits = group_size;
3600
3601 number_of_copies = nunits * number_of_vectors / group_size;
3602
3603 number_of_places_left_in_vector = nunits;
3604 constant_p = true;
3605 tree_vector_builder elts (vector_type, nunits, 1);
3606 elts.quick_grow (nunits);
3607 bool place_after_defs = false;
3608 for (j = 0; j < number_of_copies; j++)
3609 {
3610 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
3611 {
3612 /* Create 'vect_ = {op0,op1,...,opn}'. */
3613 number_of_places_left_in_vector--;
3614 tree orig_op = op;
3615 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3616 {
3617 if (CONSTANT_CLASS_P (op))
3618 {
3619 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3620 {
3621 /* Can't use VIEW_CONVERT_EXPR for booleans because
3622 of possibly different sizes of scalar value and
3623 vector element. */
3624 if (integer_zerop (op))
3625 op = build_int_cst (TREE_TYPE (vector_type), 0);
3626 else if (integer_onep (op))
3627 op = build_all_ones_cst (TREE_TYPE (vector_type));
3628 else
3629 gcc_unreachable ();
3630 }
3631 else
3632 op = fold_unary (VIEW_CONVERT_EXPR,
3633 TREE_TYPE (vector_type), op);
3634 gcc_assert (op && CONSTANT_CLASS_P (op));
3635 }
3636 else
3637 {
3638 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3639 gimple *init_stmt;
3640 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3641 {
3642 tree true_val
3643 = build_all_ones_cst (TREE_TYPE (vector_type));
3644 tree false_val
3645 = build_zero_cst (TREE_TYPE (vector_type));
3646 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3647 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3648 op, true_val,
3649 false_val);
3650 }
3651 else
3652 {
3653 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3654 op);
3655 init_stmt
3656 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3657 op);
3658 }
3659 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3660 op = new_temp;
3661 }
3662 }
3663 elts[number_of_places_left_in_vector] = op;
3664 if (!CONSTANT_CLASS_P (op))
3665 constant_p = false;
3666 if (TREE_CODE (orig_op) == SSA_NAME
3667 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3668 && is_a <bb_vec_info> (vinfo)
3669 && (as_a <bb_vec_info> (vinfo)->bb
3670 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3671 place_after_defs = true;
3672
3673 if (number_of_places_left_in_vector == 0)
3674 {
3675 if (constant_p
3676 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3677 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3678 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3679 else
3680 {
3681 if (permute_results.is_empty ())
3682 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3683 elts, number_of_vectors,
3684 permute_results);
3685 vec_cst = permute_results[number_of_vectors - j - 1];
3686 }
3687 tree init;
3688 gimple_stmt_iterator gsi;
3689 if (place_after_defs)
3690 {
3691 stmt_vec_info last_stmt_info
3692 = vect_find_last_scalar_stmt_in_slp (slp_node);
3693 gsi = gsi_for_stmt (last_stmt_info->stmt);
3694 init = vect_init_vector (vinfo, stmt_vinfo, vec_cst,
3695 vector_type, &gsi);
3696 }
3697 else
3698 init = vect_init_vector (vinfo, stmt_vinfo, vec_cst,
3699 vector_type, NULL);
3700 if (ctor_seq != NULL)
3701 {
3702 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3703 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3704 ctor_seq = NULL;
3705 }
3706 voprnds.quick_push (init);
3707 place_after_defs = false;
3708 number_of_places_left_in_vector = nunits;
3709 constant_p = true;
3710 elts.new_vector (vector_type, nunits, 1);
3711 elts.quick_grow (nunits);
3712 }
3713 }
3714 }
3715
3716 /* Since the vectors are created in the reverse order, we should invert
3717 them. */
3718 vec_num = voprnds.length ();
3719 for (j = vec_num; j != 0; j--)
3720 {
3721 vop = voprnds[j - 1];
3722 vec_oprnds->quick_push (vop);
3723 }
3724
3725 /* In case that VF is greater than the unrolling factor needed for the SLP
3726 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3727 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3728 to replicate the vectors. */
3729 while (number_of_vectors > vec_oprnds->length ())
3730 {
3731 tree neutral_vec = NULL;
3732
3733 if (neutral_op)
3734 {
3735 if (!neutral_vec)
3736 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3737
3738 vec_oprnds->quick_push (neutral_vec);
3739 }
3740 else
3741 {
3742 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3743 vec_oprnds->quick_push (vop);
3744 }
3745 }
3746 }
3747
3748
3749 /* Get vectorized definitions from SLP_NODE that contains corresponding
3750 vectorized def-stmts. */
3751
3752 static void
3753 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3754 {
3755 stmt_vec_info vec_def_stmt_info;
3756 unsigned int i;
3757
3758 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3759
3760 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3761 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
3762 }
3763
3764
3765 /* Get N vectorized definitions for SLP_NODE.
3766 If the scalar definitions are loop invariants or constants, collect them and
3767 call vect_get_constant_vectors() to create vector stmts.
3768 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3769 must be stored in the corresponding child of SLP_NODE, and we call
3770 vect_get_slp_vect_defs () to retrieve them. */
3771
3772 void
3773 vect_get_slp_defs (vec_info *vinfo,
3774 slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
3775 {
3776 if (n == -1U)
3777 n = SLP_TREE_CHILDREN (slp_node).length ();
3778
3779 for (unsigned i = 0; i < n; ++i)
3780 {
3781 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
3782
3783 vec<tree> vec_defs = vNULL;
3784
3785 /* For each operand we check if it has vectorized definitions in a child
3786 node or we need to create them (for invariants and constants). */
3787 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3788 {
3789 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3790 vect_get_slp_vect_defs (child, &vec_defs);
3791 }
3792 else
3793 vect_get_constant_vectors (vinfo, slp_node, i, &vec_defs);
3794
3795 vec_oprnds->quick_push (vec_defs);
3796 }
3797 }
3798
3799 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3800 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3801 permute statements for the SLP node NODE. */
3802
3803 bool
3804 vect_transform_slp_perm_load (vec_info *vinfo,
3805 slp_tree node, vec<tree> dr_chain,
3806 gimple_stmt_iterator *gsi, poly_uint64 vf,
3807 bool analyze_only, unsigned *n_perms)
3808 {
3809 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3810 int vec_index = 0;
3811 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3812 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
3813 unsigned int mask_element;
3814 machine_mode mode;
3815
3816 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3817 return false;
3818
3819 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3820
3821 mode = TYPE_MODE (vectype);
3822 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3823
3824 /* Initialize the vect stmts of NODE to properly insert the generated
3825 stmts later. */
3826 if (! analyze_only)
3827 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3828 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3829 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3830
3831 /* Generate permutation masks for every NODE. Number of masks for each NODE
3832 is equal to GROUP_SIZE.
3833 E.g., we have a group of three nodes with three loads from the same
3834 location in each node, and the vector size is 4. I.e., we have a
3835 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3836 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3837 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3838 ...
3839
3840 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3841 The last mask is illegal since we assume two operands for permute
3842 operation, and the mask element values can't be outside that range.
3843 Hence, the last mask must be converted into {2,5,5,5}.
3844 For the first two permutations we need the first and the second input
3845 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3846 we need the second and the third vectors: {b1,c1,a2,b2} and
3847 {c2,a3,b3,c3}. */
3848
3849 int vect_stmts_counter = 0;
3850 unsigned int index = 0;
3851 int first_vec_index = -1;
3852 int second_vec_index = -1;
3853 bool noop_p = true;
3854 *n_perms = 0;
3855
3856 vec_perm_builder mask;
3857 unsigned int nelts_to_build;
3858 unsigned int nvectors_per_build;
3859 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3860 && multiple_p (nunits, group_size));
3861 if (repeating_p)
3862 {
3863 /* A single vector contains a whole number of copies of the node, so:
3864 (a) all permutes can use the same mask; and
3865 (b) the permutes only need a single vector input. */
3866 mask.new_vector (nunits, group_size, 3);
3867 nelts_to_build = mask.encoded_nelts ();
3868 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3869 }
3870 else
3871 {
3872 /* We need to construct a separate mask for each vector statement. */
3873 unsigned HOST_WIDE_INT const_nunits, const_vf;
3874 if (!nunits.is_constant (&const_nunits)
3875 || !vf.is_constant (&const_vf))
3876 return false;
3877 mask.new_vector (const_nunits, const_nunits, 1);
3878 nelts_to_build = const_vf * group_size;
3879 nvectors_per_build = 1;
3880 }
3881
3882 unsigned int count = mask.encoded_nelts ();
3883 mask.quick_grow (count);
3884 vec_perm_indices indices;
3885
3886 for (unsigned int j = 0; j < nelts_to_build; j++)
3887 {
3888 unsigned int iter_num = j / group_size;
3889 unsigned int stmt_num = j % group_size;
3890 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3891 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3892 if (repeating_p)
3893 {
3894 first_vec_index = 0;
3895 mask_element = i;
3896 }
3897 else
3898 {
3899 /* Enforced before the loop when !repeating_p. */
3900 unsigned int const_nunits = nunits.to_constant ();
3901 vec_index = i / const_nunits;
3902 mask_element = i % const_nunits;
3903 if (vec_index == first_vec_index
3904 || first_vec_index == -1)
3905 {
3906 first_vec_index = vec_index;
3907 }
3908 else if (vec_index == second_vec_index
3909 || second_vec_index == -1)
3910 {
3911 second_vec_index = vec_index;
3912 mask_element += const_nunits;
3913 }
3914 else
3915 {
3916 if (dump_enabled_p ())
3917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3918 "permutation requires at "
3919 "least three vectors %G",
3920 stmt_info->stmt);
3921 gcc_assert (analyze_only);
3922 return false;
3923 }
3924
3925 gcc_assert (mask_element < 2 * const_nunits);
3926 }
3927
3928 if (mask_element != index)
3929 noop_p = false;
3930 mask[index++] = mask_element;
3931
3932 if (index == count && !noop_p)
3933 {
3934 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3935 if (!can_vec_perm_const_p (mode, indices))
3936 {
3937 if (dump_enabled_p ())
3938 {
3939 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3940 vect_location,
3941 "unsupported vect permute { ");
3942 for (i = 0; i < count; ++i)
3943 {
3944 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3945 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3946 }
3947 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3948 }
3949 gcc_assert (analyze_only);
3950 return false;
3951 }
3952
3953 ++*n_perms;
3954 }
3955
3956 if (index == count)
3957 {
3958 if (!analyze_only)
3959 {
3960 tree mask_vec = NULL_TREE;
3961
3962 if (! noop_p)
3963 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3964
3965 if (second_vec_index == -1)
3966 second_vec_index = first_vec_index;
3967
3968 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3969 {
3970 /* Generate the permute statement if necessary. */
3971 tree first_vec = dr_chain[first_vec_index + ri];
3972 tree second_vec = dr_chain[second_vec_index + ri];
3973 stmt_vec_info perm_stmt_info;
3974 if (! noop_p)
3975 {
3976 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3977 tree perm_dest
3978 = vect_create_destination_var (gimple_assign_lhs (stmt),
3979 vectype);
3980 perm_dest = make_ssa_name (perm_dest);
3981 gassign *perm_stmt
3982 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3983 first_vec, second_vec,
3984 mask_vec);
3985 perm_stmt_info
3986 = vect_finish_stmt_generation (vinfo,
3987 stmt_info, perm_stmt,
3988 gsi);
3989 }
3990 else
3991 /* If mask was NULL_TREE generate the requested
3992 identity transform. */
3993 perm_stmt_info = vinfo->lookup_def (first_vec);
3994
3995 /* Store the vector statement in NODE. */
3996 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3997 = perm_stmt_info;
3998 }
3999 }
4000
4001 index = 0;
4002 first_vec_index = -1;
4003 second_vec_index = -1;
4004 noop_p = true;
4005 }
4006 }
4007
4008 return true;
4009 }
4010
4011 /* Vectorize SLP instance tree in postorder. */
4012
4013 static void
4014 vect_schedule_slp_instance (vec_info *vinfo,
4015 slp_tree node, slp_instance instance)
4016 {
4017 gimple_stmt_iterator si;
4018 stmt_vec_info stmt_info;
4019 unsigned int group_size;
4020 tree vectype;
4021 int i, j;
4022 slp_tree child;
4023
4024 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4025 return;
4026
4027 /* See if we have already vectorized the node in the graph of the
4028 SLP instance. */
4029 if (SLP_TREE_VEC_STMTS (node).exists ())
4030 return;
4031
4032 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4033 vect_schedule_slp_instance (vinfo, child, instance);
4034
4035 /* Push SLP node def-type to stmts. */
4036 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4037 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4038 {
4039 stmt_vec_info child_stmt_info;
4040 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4041 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4042 }
4043
4044 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
4045
4046 /* VECTYPE is the type of the destination. */
4047 vectype = STMT_VINFO_VECTYPE (stmt_info);
4048 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4049 group_size = SLP_TREE_SCALAR_STMTS (node).length ();
4050
4051 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
4052 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
4053
4054 if (dump_enabled_p ())
4055 dump_printf_loc (MSG_NOTE, vect_location,
4056 "------>vectorizing SLP node starting from: %G",
4057 stmt_info->stmt);
4058
4059 /* Vectorized stmts go before the last scalar stmt which is where
4060 all uses are ready. */
4061 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4062 si = gsi_for_stmt (last_stmt_info->stmt);
4063
4064 /* Handle two-operation SLP nodes by vectorizing the group with
4065 both operations and then performing a merge. */
4066 bool done_p = false;
4067 if (SLP_TREE_TWO_OPERATORS (node))
4068 {
4069 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4070 enum tree_code code0 = gimple_assign_rhs_code (stmt);
4071 enum tree_code ocode = ERROR_MARK;
4072 stmt_vec_info ostmt_info;
4073 vec_perm_builder mask (group_size, group_size, 1);
4074 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4075 {
4076 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4077 if (gimple_assign_rhs_code (ostmt) != code0)
4078 {
4079 mask.quick_push (1);
4080 ocode = gimple_assign_rhs_code (ostmt);
4081 }
4082 else
4083 mask.quick_push (0);
4084 }
4085 if (ocode != ERROR_MARK)
4086 {
4087 vec<stmt_vec_info> v0;
4088 vec<stmt_vec_info> v1;
4089 unsigned j;
4090 tree tmask = NULL_TREE;
4091 vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
4092 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4093 SLP_TREE_VEC_STMTS (node).truncate (0);
4094 gimple_assign_set_rhs_code (stmt, ocode);
4095 vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
4096 gimple_assign_set_rhs_code (stmt, code0);
4097 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4098 SLP_TREE_VEC_STMTS (node).truncate (0);
4099 tree meltype = build_nonstandard_integer_type
4100 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4101 tree mvectype = get_same_sized_vectype (meltype, vectype);
4102 unsigned k = 0, l;
4103 for (j = 0; j < v0.length (); ++j)
4104 {
4105 /* Enforced by vect_build_slp_tree, which rejects variable-length
4106 vectors for SLP_TREE_TWO_OPERATORS. */
4107 unsigned int const_nunits = nunits.to_constant ();
4108 tree_vector_builder melts (mvectype, const_nunits, 1);
4109 for (l = 0; l < const_nunits; ++l)
4110 {
4111 if (k >= group_size)
4112 k = 0;
4113 tree t = build_int_cst (meltype,
4114 mask[k++] * const_nunits + l);
4115 melts.quick_push (t);
4116 }
4117 tmask = melts.build ();
4118
4119 /* ??? Not all targets support a VEC_PERM_EXPR with a
4120 constant mask that would translate to a vec_merge RTX
4121 (with their vec_perm_const_ok). We can either not
4122 vectorize in that case or let veclower do its job.
4123 Unfortunately that isn't too great and at least for
4124 plus/minus we'd eventually like to match targets
4125 vector addsub instructions. */
4126 gimple *vstmt;
4127 vstmt = gimple_build_assign (make_ssa_name (vectype),
4128 VEC_PERM_EXPR,
4129 gimple_assign_lhs (v0[j]->stmt),
4130 gimple_assign_lhs (v1[j]->stmt),
4131 tmask);
4132 SLP_TREE_VEC_STMTS (node).quick_push
4133 (vect_finish_stmt_generation (vinfo, stmt_info, vstmt, &si));
4134 }
4135 v0.release ();
4136 v1.release ();
4137 done_p = true;
4138 }
4139 }
4140 if (!done_p)
4141 vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
4142
4143 /* Restore stmt def-types. */
4144 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4145 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4146 {
4147 stmt_vec_info child_stmt_info;
4148 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4149 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4150 }
4151 }
4152
4153 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4154 For loop vectorization this is done in vectorizable_call, but for SLP
4155 it needs to be deferred until end of vect_schedule_slp, because multiple
4156 SLP instances may refer to the same scalar stmt. */
4157
4158 static void
4159 vect_remove_slp_scalar_calls (vec_info *vinfo,
4160 slp_tree node, hash_set<slp_tree> &visited)
4161 {
4162 gimple *new_stmt;
4163 gimple_stmt_iterator gsi;
4164 int i;
4165 slp_tree child;
4166 tree lhs;
4167 stmt_vec_info stmt_info;
4168
4169 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4170 return;
4171
4172 if (visited.add (node))
4173 return;
4174
4175 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4176 vect_remove_slp_scalar_calls (vinfo, child, visited);
4177
4178 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4179 {
4180 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4181 if (!stmt || gimple_bb (stmt) == NULL)
4182 continue;
4183 if (is_pattern_stmt_p (stmt_info)
4184 || !PURE_SLP_STMT (stmt_info))
4185 continue;
4186 lhs = gimple_call_lhs (stmt);
4187 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4188 gsi = gsi_for_stmt (stmt);
4189 vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4190 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4191 }
4192 }
4193
4194 static void
4195 vect_remove_slp_scalar_calls (vec_info *vinfo, slp_tree node)
4196 {
4197 hash_set<slp_tree> visited;
4198 vect_remove_slp_scalar_calls (vinfo, node, visited);
4199 }
4200
4201 /* Vectorize the instance root. */
4202
4203 void
4204 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4205 {
4206 gassign *rstmt = NULL;
4207
4208 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4209 {
4210 stmt_vec_info child_stmt_info;
4211 int j;
4212
4213 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4214 {
4215 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4216 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4217 if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
4218 TREE_TYPE (vect_lhs)))
4219 vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
4220 vect_lhs);
4221 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4222 break;
4223 }
4224 }
4225 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4226 {
4227 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4228 stmt_vec_info child_stmt_info;
4229 int j;
4230 vec<constructor_elt, va_gc> *v;
4231 vec_alloc (v, nelts);
4232
4233 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4234 {
4235 CONSTRUCTOR_APPEND_ELT (v,
4236 NULL_TREE,
4237 gimple_get_lhs (child_stmt_info->stmt));
4238 }
4239 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4240 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4241 tree r_constructor = build_constructor (rtype, v);
4242 rstmt = gimple_build_assign (lhs, r_constructor);
4243 }
4244
4245 gcc_assert (rstmt);
4246
4247 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4248 gsi_replace (&rgsi, rstmt, true);
4249 }
4250
4251 /* Generate vector code for all SLP instances in the loop/basic block. */
4252
4253 void
4254 vect_schedule_slp (vec_info *vinfo)
4255 {
4256 vec<slp_instance> slp_instances;
4257 slp_instance instance;
4258 unsigned int i;
4259
4260 slp_instances = vinfo->slp_instances;
4261 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4262 {
4263 slp_tree node = SLP_INSTANCE_TREE (instance);
4264 /* Schedule the tree of INSTANCE. */
4265 vect_schedule_slp_instance (vinfo, node, instance);
4266
4267 if (SLP_INSTANCE_ROOT_STMT (instance))
4268 vectorize_slp_instance_root_stmt (node, instance);
4269
4270 if (dump_enabled_p ())
4271 dump_printf_loc (MSG_NOTE, vect_location,
4272 "vectorizing stmts using SLP.\n");
4273 }
4274
4275 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4276 {
4277 slp_tree root = SLP_INSTANCE_TREE (instance);
4278 stmt_vec_info store_info;
4279 unsigned int j;
4280
4281 /* Remove scalar call stmts. Do not do this for basic-block
4282 vectorization as not all uses may be vectorized.
4283 ??? Why should this be necessary? DCE should be able to
4284 remove the stmts itself.
4285 ??? For BB vectorization we can as well remove scalar
4286 stmts starting from the SLP tree root if they have no
4287 uses. */
4288 if (is_a <loop_vec_info> (vinfo))
4289 vect_remove_slp_scalar_calls (vinfo, root);
4290
4291 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info); j++)
4292 {
4293 if (!STMT_VINFO_DATA_REF (store_info))
4294 break;
4295
4296 if (SLP_INSTANCE_ROOT_STMT (instance))
4297 continue;
4298
4299 store_info = vect_orig_stmt (store_info);
4300 /* Free the attached stmt_vec_info and remove the stmt. */
4301 vinfo->remove_stmt (store_info);
4302 }
4303 }
4304 }