]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-slp.c
tree-optimization/93868 copy SLP tree before re-arranging stmts
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
42 #include "dbgcnt.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47
48
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
52
53 static void
54 vect_free_slp_tree (slp_tree node, bool final_p)
55 {
56 int i;
57 slp_tree child;
58
59 if (--node->refcnt != 0)
60 return;
61
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
63 vect_free_slp_tree (child, final_p);
64
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
69 if (!final_p)
70 {
71 stmt_vec_info stmt_info;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
73 {
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
76 }
77 }
78
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
81 SLP_TREE_SCALAR_OPS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
83 SLP_TREE_LOAD_PERMUTATION (node).release ();
84
85 free (node);
86 }
87
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
91
92 void
93 vect_free_slp_instance (slp_instance instance, bool final_p)
94 {
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96 SLP_INSTANCE_LOADS (instance).release ();
97 free (instance);
98 }
99
100
101 /* Create an SLP node for SCALAR_STMTS. */
102
103 static slp_tree
104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
105 {
106 slp_tree node;
107 stmt_vec_info stmt_info = scalar_stmts[0];
108 unsigned int nops;
109
110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
118 else if (is_a <gphi *> (stmt_info->stmt))
119 nops = 0;
120 else
121 return NULL;
122
123 node = XNEW (struct _slp_tree);
124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125 SLP_TREE_SCALAR_OPS (node) = vNULL;
126 SLP_TREE_VEC_STMTS (node).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
128 SLP_TREE_CHILDREN (node).create (nops);
129 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
130 SLP_TREE_TWO_OPERATORS (node) = false;
131 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
132 node->refcnt = 1;
133 node->max_nunits = 1;
134
135 unsigned i;
136 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
138
139 return node;
140 }
141
142 /* Create an SLP node for OPS. */
143
144 static slp_tree
145 vect_create_new_slp_node (vec<tree> ops)
146 {
147 slp_tree node;
148
149 node = XNEW (struct _slp_tree);
150 SLP_TREE_SCALAR_STMTS (node) = vNULL;
151 SLP_TREE_SCALAR_OPS (node) = ops;
152 SLP_TREE_VEC_STMTS (node).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154 SLP_TREE_CHILDREN (node) = vNULL;
155 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156 SLP_TREE_TWO_OPERATORS (node) = false;
157 SLP_TREE_DEF_TYPE (node) = vect_external_def;
158 node->refcnt = 1;
159 node->max_nunits = 1;
160
161 return node;
162 }
163
164
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
167 node. */
168 typedef struct _slp_oprnd_info
169 {
170 /* Def-stmts for the operands. */
171 vec<stmt_vec_info> def_stmts;
172 /* Operands. */
173 vec<tree> ops;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
176 stmt. */
177 tree first_op_type;
178 enum vect_def_type first_dt;
179 bool any_pattern;
180 } *slp_oprnd_info;
181
182
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184 operand. */
185 static vec<slp_oprnd_info>
186 vect_create_oprnd_info (int nops, int group_size)
187 {
188 int i;
189 slp_oprnd_info oprnd_info;
190 vec<slp_oprnd_info> oprnds_info;
191
192 oprnds_info.create (nops);
193 for (i = 0; i < nops; i++)
194 {
195 oprnd_info = XNEW (struct _slp_oprnd_info);
196 oprnd_info->def_stmts.create (group_size);
197 oprnd_info->ops.create (group_size);
198 oprnd_info->first_dt = vect_uninitialized_def;
199 oprnd_info->first_op_type = NULL_TREE;
200 oprnd_info->any_pattern = false;
201 oprnds_info.quick_push (oprnd_info);
202 }
203
204 return oprnds_info;
205 }
206
207
208 /* Free operands info. */
209
210 static void
211 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
212 {
213 int i;
214 slp_oprnd_info oprnd_info;
215
216 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
217 {
218 oprnd_info->def_stmts.release ();
219 oprnd_info->ops.release ();
220 XDELETE (oprnd_info);
221 }
222
223 oprnds_info.release ();
224 }
225
226
227 /* Return true if STMTS contains a pattern statement. */
228
229 static bool
230 vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
231 {
232 stmt_vec_info stmt_info;
233 unsigned int i;
234 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
235 if (is_pattern_stmt_p (stmt_info))
236 return true;
237 return false;
238 }
239
240 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
241 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
242 of the chain. */
243
244 int
245 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
246 stmt_vec_info first_stmt_info)
247 {
248 stmt_vec_info next_stmt_info = first_stmt_info;
249 int result = 0;
250
251 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
252 return -1;
253
254 do
255 {
256 if (next_stmt_info == stmt_info)
257 return result;
258 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
259 if (next_stmt_info)
260 result += DR_GROUP_GAP (next_stmt_info);
261 }
262 while (next_stmt_info);
263
264 return -1;
265 }
266
267 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
268 using the method implemented by duplicate_and_interleave. Return true
269 if so, returning the number of intermediate vectors in *NVECTORS_OUT
270 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
271 (if nonnull). */
272
273 bool
274 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
275 tree elt_type, unsigned int *nvectors_out,
276 tree *vector_type_out,
277 tree *permutes)
278 {
279 tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
280 if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
281 return false;
282
283 machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
284 poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
285 unsigned int nvectors = 1;
286 for (;;)
287 {
288 scalar_int_mode int_mode;
289 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
290 if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
291 {
292 /* Get the natural vector type for this SLP group size. */
293 tree int_type = build_nonstandard_integer_type
294 (GET_MODE_BITSIZE (int_mode), 1);
295 tree vector_type
296 = get_vectype_for_scalar_type (vinfo, int_type, count);
297 if (vector_type
298 && VECTOR_MODE_P (TYPE_MODE (vector_type))
299 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
300 GET_MODE_SIZE (base_vector_mode)))
301 {
302 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 together into elements of type INT_TYPE and using the result
304 to build NVECTORS vectors. */
305 poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
306 vec_perm_builder sel1 (nelts, 2, 3);
307 vec_perm_builder sel2 (nelts, 2, 3);
308 poly_int64 half_nelts = exact_div (nelts, 2);
309 for (unsigned int i = 0; i < 3; ++i)
310 {
311 sel1.quick_push (i);
312 sel1.quick_push (i + nelts);
313 sel2.quick_push (half_nelts + i);
314 sel2.quick_push (half_nelts + i + nelts);
315 }
316 vec_perm_indices indices1 (sel1, 2, nelts);
317 vec_perm_indices indices2 (sel2, 2, nelts);
318 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
319 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
320 {
321 if (nvectors_out)
322 *nvectors_out = nvectors;
323 if (vector_type_out)
324 *vector_type_out = vector_type;
325 if (permutes)
326 {
327 permutes[0] = vect_gen_perm_mask_checked (vector_type,
328 indices1);
329 permutes[1] = vect_gen_perm_mask_checked (vector_type,
330 indices2);
331 }
332 return true;
333 }
334 }
335 }
336 if (!multiple_p (elt_bytes, 2, &elt_bytes))
337 return false;
338 nvectors *= 2;
339 }
340 }
341
342 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343 they are of a valid type and that they match the defs of the first stmt of
344 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
345 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
346 indicates swap is required for cond_expr stmts. Specifically, *SWAP
347 is 1 if STMT is cond and operands of comparison need to be swapped;
348 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349 If there is any operand swap in this function, *SWAP is set to non-zero
350 value.
351 If there was a fatal error return -1; if the error could be corrected by
352 swapping operands of father node of this one, return 1; if everything is
353 ok return 0. */
354 static int
355 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
356 vec<stmt_vec_info> stmts, unsigned stmt_num,
357 vec<slp_oprnd_info> *oprnds_info)
358 {
359 stmt_vec_info stmt_info = stmts[stmt_num];
360 tree oprnd;
361 unsigned int i, number_of_oprnds;
362 enum vect_def_type dt = vect_uninitialized_def;
363 slp_oprnd_info oprnd_info;
364 int first_op_idx = 1;
365 unsigned int commutative_op = -1U;
366 bool first_op_cond = false;
367 bool first = stmt_num == 0;
368
369 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
370 {
371 number_of_oprnds = gimple_call_num_args (stmt);
372 first_op_idx = 3;
373 if (gimple_call_internal_p (stmt))
374 {
375 internal_fn ifn = gimple_call_internal_fn (stmt);
376 commutative_op = first_commutative_argument (ifn);
377
378 /* Masked load, only look at mask. */
379 if (ifn == IFN_MASK_LOAD)
380 {
381 number_of_oprnds = 1;
382 /* Mask operand index. */
383 first_op_idx = 5;
384 }
385 }
386 }
387 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
388 {
389 enum tree_code code = gimple_assign_rhs_code (stmt);
390 number_of_oprnds = gimple_num_ops (stmt) - 1;
391 /* Swap can only be done for cond_expr if asked to, otherwise we
392 could result in different comparison code to the first stmt. */
393 if (code == COND_EXPR
394 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
395 {
396 first_op_cond = true;
397 number_of_oprnds++;
398 }
399 else
400 commutative_op = commutative_tree_code (code) ? 0U : -1U;
401 }
402 else
403 return -1;
404
405 bool swapped = (*swap != 0);
406 gcc_assert (!swapped || first_op_cond);
407 for (i = 0; i < number_of_oprnds; i++)
408 {
409 again:
410 if (first_op_cond)
411 {
412 /* Map indicating how operands of cond_expr should be swapped. */
413 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 int *map = maps[*swap];
415
416 if (i < 2)
417 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
418 first_op_idx), map[i]);
419 else
420 oprnd = gimple_op (stmt_info->stmt, map[i]);
421 }
422 else
423 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
424 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
425 oprnd = TREE_OPERAND (oprnd, 0);
426
427 oprnd_info = (*oprnds_info)[i];
428
429 stmt_vec_info def_stmt_info;
430 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
431 {
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434 "Build SLP failed: can't analyze def for %T\n",
435 oprnd);
436
437 return -1;
438 }
439
440 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
441 oprnd_info->any_pattern = true;
442
443 if (first)
444 {
445 /* For the swapping logic below force vect_reduction_def
446 for the reduction op in a SLP reduction group. */
447 if (!STMT_VINFO_DATA_REF (stmt_info)
448 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
449 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
450 && def_stmt_info)
451 dt = vect_reduction_def;
452 oprnd_info->first_dt = dt;
453 oprnd_info->first_op_type = TREE_TYPE (oprnd);
454 }
455 else
456 {
457 /* Not first stmt of the group, check that the def-stmt/s match
458 the def-stmt/s of the first stmt. Allow different definition
459 types for reduction chains: the first stmt must be a
460 vect_reduction_def (a phi node), and the rest
461 end in the reduction chain. */
462 tree type = TREE_TYPE (oprnd);
463 if ((oprnd_info->first_dt != dt
464 && !(oprnd_info->first_dt == vect_reduction_def
465 && !STMT_VINFO_DATA_REF (stmt_info)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
467 && def_stmt_info
468 && !STMT_VINFO_DATA_REF (def_stmt_info)
469 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
470 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
471 && !((oprnd_info->first_dt == vect_external_def
472 || oprnd_info->first_dt == vect_constant_def)
473 && (dt == vect_external_def
474 || dt == vect_constant_def)))
475 || !types_compatible_p (oprnd_info->first_op_type, type)
476 || (!STMT_VINFO_DATA_REF (stmt_info)
477 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
478 && ((!def_stmt_info
479 || STMT_VINFO_DATA_REF (def_stmt_info)
480 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
481 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
482 != (oprnd_info->first_dt != vect_reduction_def))))
483 {
484 /* Try swapping operands if we got a mismatch. */
485 if (i == commutative_op && !swapped)
486 {
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "trying swapped operands\n");
490 swapped = true;
491 goto again;
492 }
493
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 "Build SLP failed: different types\n");
497
498 return 1;
499 }
500 if ((dt == vect_constant_def
501 || dt == vect_external_def)
502 && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
503 && (TREE_CODE (type) == BOOLEAN_TYPE
504 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
505 type)))
506 {
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "Build SLP failed: invalid type of def "
510 "for variable-length SLP %T\n", oprnd);
511 return -1;
512 }
513 }
514
515 /* Check the types of the definitions. */
516 switch (dt)
517 {
518 case vect_external_def:
519 /* Make sure to demote the overall operand to external. */
520 oprnd_info->first_dt = vect_external_def;
521 /* Fallthru. */
522 case vect_constant_def:
523 oprnd_info->def_stmts.quick_push (NULL);
524 oprnd_info->ops.quick_push (oprnd);
525 break;
526
527 case vect_internal_def:
528 case vect_reduction_def:
529 if (oprnd_info->first_dt == vect_reduction_def
530 && !STMT_VINFO_DATA_REF (stmt_info)
531 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
532 && !STMT_VINFO_DATA_REF (def_stmt_info)
533 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
534 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
535 {
536 /* For a SLP reduction chain we want to duplicate the
537 reduction to each of the chain members. That gets
538 us a sane SLP graph (still the stmts are not 100%
539 correct wrt the initial values). */
540 gcc_assert (!first);
541 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
542 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
543 break;
544 }
545 /* Fallthru. */
546 case vect_induction_def:
547 oprnd_info->def_stmts.quick_push (def_stmt_info);
548 oprnd_info->ops.quick_push (oprnd);
549 break;
550
551 default:
552 /* FORNOW: Not supported. */
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
555 "Build SLP failed: illegal type of def %T\n",
556 oprnd);
557
558 return -1;
559 }
560 }
561
562 /* Swap operands. */
563 if (swapped)
564 {
565 if (first_op_cond)
566 {
567 /* If there are already uses of this stmt in a SLP instance then
568 we've committed to the operand order and can't swap it. */
569 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
570 {
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
573 "Build SLP failed: cannot swap operands of "
574 "shared stmt %G", stmt_info->stmt);
575 return -1;
576 }
577
578 /* To get rid of this swapping we have to move the stmt code
579 to the SLP tree as well (and gather it here per stmt). */
580 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
581 tree cond = gimple_assign_rhs1 (stmt);
582 enum tree_code code = TREE_CODE (cond);
583
584 /* Swap. */
585 if (*swap == 1)
586 {
587 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
588 &TREE_OPERAND (cond, 1));
589 TREE_SET_CODE (cond, swap_tree_comparison (code));
590 }
591 /* Invert. */
592 else
593 {
594 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
595 gimple_assign_rhs3_ptr (stmt));
596 if (STMT_VINFO_REDUC_IDX (stmt_info) == 1)
597 STMT_VINFO_REDUC_IDX (stmt_info) = 2;
598 else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
599 STMT_VINFO_REDUC_IDX (stmt_info) = 1;
600 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
601 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
602 gcc_assert (code != ERROR_MARK);
603 TREE_SET_CODE (cond, code);
604 }
605 }
606 else
607 {
608 /* Commutative ops need not reflect swapping, ops are in
609 the SLP tree. */
610 }
611 if (dump_enabled_p ())
612 dump_printf_loc (MSG_NOTE, vect_location,
613 "swapped operands to match def types in %G",
614 stmt_info->stmt);
615 }
616
617 *swap = swapped;
618 return 0;
619 }
620
621 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
622 Return true if we can, meaning that this choice doesn't conflict with
623 existing SLP nodes that use STMT_INFO. */
624
625 static bool
626 vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
627 {
628 tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
629 if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
630 return true;
631
632 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
633 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
634 {
635 /* We maintain the invariant that if any statement in the group is
636 used, all other members of the group have the same vector type. */
637 stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
638 stmt_vec_info member_info = first_info;
639 for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
640 if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
641 || is_pattern_stmt_p (member_info))
642 break;
643
644 if (!member_info)
645 {
646 for (member_info = first_info; member_info;
647 member_info = DR_GROUP_NEXT_ELEMENT (member_info))
648 STMT_VINFO_VECTYPE (member_info) = vectype;
649 return true;
650 }
651 }
652 else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
653 && !is_pattern_stmt_p (stmt_info))
654 {
655 STMT_VINFO_VECTYPE (stmt_info) = vectype;
656 return true;
657 }
658
659 if (dump_enabled_p ())
660 {
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
662 "Build SLP failed: incompatible vector"
663 " types for: %G", stmt_info->stmt);
664 dump_printf_loc (MSG_NOTE, vect_location,
665 " old vector type: %T\n", old_vectype);
666 dump_printf_loc (MSG_NOTE, vect_location,
667 " new vector type: %T\n", vectype);
668 }
669 return false;
670 }
671
672 /* Try to infer and assign a vector type to all the statements in STMTS.
673 Used only for BB vectorization. */
674
675 static bool
676 vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
677 {
678 tree vectype, nunits_vectype;
679 if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
680 &nunits_vectype, stmts.length ()))
681 return false;
682
683 stmt_vec_info stmt_info;
684 unsigned int i;
685 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
686 if (!vect_update_shared_vectype (stmt_info, vectype))
687 return false;
688
689 return true;
690 }
691
692 /* Return true if call statements CALL1 and CALL2 are similar enough
693 to be combined into the same SLP group. */
694
695 static bool
696 compatible_calls_p (gcall *call1, gcall *call2)
697 {
698 unsigned int nargs = gimple_call_num_args (call1);
699 if (nargs != gimple_call_num_args (call2))
700 return false;
701
702 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
703 return false;
704
705 if (gimple_call_internal_p (call1))
706 {
707 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
708 TREE_TYPE (gimple_call_lhs (call2))))
709 return false;
710 for (unsigned int i = 0; i < nargs; ++i)
711 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
712 TREE_TYPE (gimple_call_arg (call2, i))))
713 return false;
714 }
715 else
716 {
717 if (!operand_equal_p (gimple_call_fn (call1),
718 gimple_call_fn (call2), 0))
719 return false;
720
721 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
722 return false;
723 }
724 return true;
725 }
726
727 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
728 caller's attempt to find the vector type in STMT_INFO with the narrowest
729 element type. Return true if VECTYPE is nonnull and if it is valid
730 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
731 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
732 vect_build_slp_tree. */
733
734 static bool
735 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
736 tree vectype, poly_uint64 *max_nunits)
737 {
738 if (!vectype)
739 {
740 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: unsupported data-type in %G\n",
743 stmt_info->stmt);
744 /* Fatal mismatch. */
745 return false;
746 }
747
748 /* If populating the vector type requires unrolling then fail
749 before adjusting *max_nunits for basic-block vectorization. */
750 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
751 unsigned HOST_WIDE_INT const_nunits;
752 if (STMT_VINFO_BB_VINFO (stmt_info)
753 && (!nunits.is_constant (&const_nunits)
754 || const_nunits > group_size))
755 {
756 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
758 "Build SLP failed: unrolling required "
759 "in basic block SLP\n");
760 /* Fatal mismatch. */
761 return false;
762 }
763
764 /* In case of multiple types we need to detect the smallest type. */
765 vect_update_max_nunits (max_nunits, vectype);
766 return true;
767 }
768
769 /* STMTS is a group of GROUP_SIZE SLP statements in which some
770 statements do the same operation as the first statement and in which
771 the others do ALT_STMT_CODE. Return true if we can take one vector
772 of the first operation and one vector of the second and permute them
773 to get the required result. VECTYPE is the type of the vector that
774 would be permuted. */
775
776 static bool
777 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
778 unsigned int group_size, tree vectype,
779 tree_code alt_stmt_code)
780 {
781 unsigned HOST_WIDE_INT count;
782 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
783 return false;
784
785 vec_perm_builder sel (count, count, 1);
786 for (unsigned int i = 0; i < count; ++i)
787 {
788 unsigned int elt = i;
789 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
790 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
791 elt += count;
792 sel.quick_push (elt);
793 }
794 vec_perm_indices indices (sel, 2, count);
795 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
796 }
797
798 /* Verify if the scalar stmts STMTS are isomorphic, require data
799 permutation or are of unsupported types of operation. Return
800 true if they are, otherwise return false and indicate in *MATCHES
801 which stmts are not isomorphic to the first one. If MATCHES[0]
802 is false then this indicates the comparison could not be
803 carried out or the stmts will never be vectorized by SLP.
804
805 Note COND_EXPR is possibly isomorphic to another one after swapping its
806 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
807 the first stmt by swapping the two operands of comparison; set SWAP[i]
808 to 2 if stmt I is isormorphic to the first stmt by inverting the code
809 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
810 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
811
812 static bool
813 vect_build_slp_tree_1 (unsigned char *swap,
814 vec<stmt_vec_info> stmts, unsigned int group_size,
815 poly_uint64 *max_nunits, bool *matches,
816 bool *two_operators)
817 {
818 unsigned int i;
819 stmt_vec_info first_stmt_info = stmts[0];
820 enum tree_code first_stmt_code = ERROR_MARK;
821 enum tree_code alt_stmt_code = ERROR_MARK;
822 enum tree_code rhs_code = ERROR_MARK;
823 enum tree_code first_cond_code = ERROR_MARK;
824 tree lhs;
825 bool need_same_oprnds = false;
826 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
827 optab optab;
828 int icode;
829 machine_mode optab_op2_mode;
830 machine_mode vec_mode;
831 stmt_vec_info first_load = NULL, prev_first_load = NULL;
832 bool load_p = false;
833
834 /* For every stmt in NODE find its def stmt/s. */
835 stmt_vec_info stmt_info;
836 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
837 {
838 vec_info *vinfo = stmt_info->vinfo;
839 gimple *stmt = stmt_info->stmt;
840 swap[i] = 0;
841 matches[i] = false;
842
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
845
846 /* Fail to vectorize statements marked as unvectorizable. */
847 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
848 {
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
851 "Build SLP failed: unvectorizable statement %G",
852 stmt);
853 /* Fatal mismatch. */
854 matches[0] = false;
855 return false;
856 }
857
858 lhs = gimple_get_lhs (stmt);
859 if (lhs == NULL_TREE)
860 {
861 if (dump_enabled_p ())
862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
863 "Build SLP failed: not GIMPLE_ASSIGN nor "
864 "GIMPLE_CALL %G", stmt);
865 /* Fatal mismatch. */
866 matches[0] = false;
867 return false;
868 }
869
870 tree nunits_vectype;
871 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
872 &nunits_vectype, group_size)
873 || (nunits_vectype
874 && !vect_record_max_nunits (stmt_info, group_size,
875 nunits_vectype, max_nunits)))
876 {
877 /* Fatal mismatch. */
878 matches[0] = false;
879 return false;
880 }
881
882 gcc_assert (vectype);
883
884 if (is_a <bb_vec_info> (vinfo)
885 && !vect_update_shared_vectype (stmt_info, vectype))
886 continue;
887
888 gcall *call_stmt = dyn_cast <gcall *> (stmt);
889 if (call_stmt)
890 {
891 rhs_code = CALL_EXPR;
892
893 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
894 load_p = true;
895 else if ((gimple_call_internal_p (call_stmt)
896 && (!vectorizable_internal_fn_p
897 (gimple_call_internal_fn (call_stmt))))
898 || gimple_call_tail_p (call_stmt)
899 || gimple_call_noreturn_p (call_stmt)
900 || !gimple_call_nothrow_p (call_stmt)
901 || gimple_call_chain (call_stmt))
902 {
903 if (dump_enabled_p ())
904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
905 "Build SLP failed: unsupported call type %G",
906 call_stmt);
907 /* Fatal mismatch. */
908 matches[0] = false;
909 return false;
910 }
911 }
912 else
913 {
914 rhs_code = gimple_assign_rhs_code (stmt);
915 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
916 }
917
918 /* Check the operation. */
919 if (i == 0)
920 {
921 first_stmt_code = rhs_code;
922
923 /* Shift arguments should be equal in all the packed stmts for a
924 vector shift with scalar shift operand. */
925 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
926 || rhs_code == LROTATE_EXPR
927 || rhs_code == RROTATE_EXPR)
928 {
929 vec_mode = TYPE_MODE (vectype);
930
931 /* First see if we have a vector/vector shift. */
932 optab = optab_for_tree_code (rhs_code, vectype,
933 optab_vector);
934
935 if (!optab
936 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
937 {
938 /* No vector/vector shift, try for a vector/scalar shift. */
939 optab = optab_for_tree_code (rhs_code, vectype,
940 optab_scalar);
941
942 if (!optab)
943 {
944 if (dump_enabled_p ())
945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
946 "Build SLP failed: no optab.\n");
947 /* Fatal mismatch. */
948 matches[0] = false;
949 return false;
950 }
951 icode = (int) optab_handler (optab, vec_mode);
952 if (icode == CODE_FOR_nothing)
953 {
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
956 "Build SLP failed: "
957 "op not supported by target.\n");
958 /* Fatal mismatch. */
959 matches[0] = false;
960 return false;
961 }
962 optab_op2_mode = insn_data[icode].operand[2].mode;
963 if (!VECTOR_MODE_P (optab_op2_mode))
964 {
965 need_same_oprnds = true;
966 first_op1 = gimple_assign_rhs2 (stmt);
967 }
968 }
969 }
970 else if (rhs_code == WIDEN_LSHIFT_EXPR)
971 {
972 need_same_oprnds = true;
973 first_op1 = gimple_assign_rhs2 (stmt);
974 }
975 else if (call_stmt
976 && gimple_call_internal_p (call_stmt, IFN_DIV_POW2))
977 {
978 need_same_oprnds = true;
979 first_op1 = gimple_call_arg (call_stmt, 1);
980 }
981 }
982 else
983 {
984 if (first_stmt_code != rhs_code
985 && alt_stmt_code == ERROR_MARK)
986 alt_stmt_code = rhs_code;
987 if (first_stmt_code != rhs_code
988 && (first_stmt_code != IMAGPART_EXPR
989 || rhs_code != REALPART_EXPR)
990 && (first_stmt_code != REALPART_EXPR
991 || rhs_code != IMAGPART_EXPR)
992 /* Handle mismatches in plus/minus by computing both
993 and merging the results. */
994 && !((first_stmt_code == PLUS_EXPR
995 || first_stmt_code == MINUS_EXPR)
996 && (alt_stmt_code == PLUS_EXPR
997 || alt_stmt_code == MINUS_EXPR)
998 && rhs_code == alt_stmt_code)
999 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
1000 && (first_stmt_code == ARRAY_REF
1001 || first_stmt_code == BIT_FIELD_REF
1002 || first_stmt_code == INDIRECT_REF
1003 || first_stmt_code == COMPONENT_REF
1004 || first_stmt_code == MEM_REF)))
1005 {
1006 if (dump_enabled_p ())
1007 {
1008 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1009 "Build SLP failed: different operation "
1010 "in stmt %G", stmt);
1011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1012 "original stmt %G", first_stmt_info->stmt);
1013 }
1014 /* Mismatch. */
1015 continue;
1016 }
1017
1018 if (need_same_oprnds)
1019 {
1020 tree other_op1 = (call_stmt
1021 ? gimple_call_arg (call_stmt, 1)
1022 : gimple_assign_rhs2 (stmt));
1023 if (!operand_equal_p (first_op1, other_op1, 0))
1024 {
1025 if (dump_enabled_p ())
1026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1027 "Build SLP failed: different shift "
1028 "arguments in %G", stmt);
1029 /* Mismatch. */
1030 continue;
1031 }
1032 }
1033
1034 if (!load_p && rhs_code == CALL_EXPR)
1035 {
1036 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
1037 as_a <gcall *> (stmt)))
1038 {
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1041 "Build SLP failed: different calls in %G",
1042 stmt);
1043 /* Mismatch. */
1044 continue;
1045 }
1046 }
1047 }
1048
1049 /* Grouped store or load. */
1050 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1051 {
1052 if (REFERENCE_CLASS_P (lhs))
1053 {
1054 /* Store. */
1055 ;
1056 }
1057 else
1058 {
1059 /* Load. */
1060 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
1061 if (prev_first_load)
1062 {
1063 /* Check that there are no loads from different interleaving
1064 chains in the same node. */
1065 if (prev_first_load != first_load)
1066 {
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1069 vect_location,
1070 "Build SLP failed: different "
1071 "interleaving chains in one node %G",
1072 stmt);
1073 /* Mismatch. */
1074 continue;
1075 }
1076 }
1077 else
1078 prev_first_load = first_load;
1079 }
1080 } /* Grouped access. */
1081 else
1082 {
1083 if (load_p)
1084 {
1085 /* Not grouped load. */
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1088 "Build SLP failed: not grouped load %G", stmt);
1089
1090 /* FORNOW: Not grouped loads are not supported. */
1091 /* Fatal mismatch. */
1092 matches[0] = false;
1093 return false;
1094 }
1095
1096 /* Not memory operation. */
1097 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
1098 && TREE_CODE_CLASS (rhs_code) != tcc_unary
1099 && TREE_CODE_CLASS (rhs_code) != tcc_expression
1100 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1101 && rhs_code != VIEW_CONVERT_EXPR
1102 && rhs_code != CALL_EXPR)
1103 {
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "Build SLP failed: operation unsupported %G",
1107 stmt);
1108 /* Fatal mismatch. */
1109 matches[0] = false;
1110 return false;
1111 }
1112
1113 if (rhs_code == COND_EXPR)
1114 {
1115 tree cond_expr = gimple_assign_rhs1 (stmt);
1116 enum tree_code cond_code = TREE_CODE (cond_expr);
1117 enum tree_code swap_code = ERROR_MARK;
1118 enum tree_code invert_code = ERROR_MARK;
1119
1120 if (i == 0)
1121 first_cond_code = TREE_CODE (cond_expr);
1122 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1123 {
1124 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1125 swap_code = swap_tree_comparison (cond_code);
1126 invert_code = invert_tree_comparison (cond_code, honor_nans);
1127 }
1128
1129 if (first_cond_code == cond_code)
1130 ;
1131 /* Isomorphic can be achieved by swapping. */
1132 else if (first_cond_code == swap_code)
1133 swap[i] = 1;
1134 /* Isomorphic can be achieved by inverting. */
1135 else if (first_cond_code == invert_code)
1136 swap[i] = 2;
1137 else
1138 {
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1141 "Build SLP failed: different"
1142 " operation %G", stmt);
1143 /* Mismatch. */
1144 continue;
1145 }
1146 }
1147 }
1148
1149 matches[i] = true;
1150 }
1151
1152 for (i = 0; i < group_size; ++i)
1153 if (!matches[i])
1154 return false;
1155
1156 /* If we allowed a two-operation SLP node verify the target can cope
1157 with the permute we are going to use. */
1158 if (alt_stmt_code != ERROR_MARK
1159 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1160 {
1161 if (!vect_two_operations_perm_ok_p (stmts, group_size,
1162 vectype, alt_stmt_code))
1163 {
1164 for (i = 0; i < group_size; ++i)
1165 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1166 {
1167 matches[i] = false;
1168 if (dump_enabled_p ())
1169 {
1170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1171 "Build SLP failed: different operation "
1172 "in stmt %G", stmts[i]->stmt);
1173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1174 "original stmt %G", first_stmt_info->stmt);
1175 }
1176 }
1177 return false;
1178 }
1179 *two_operators = true;
1180 }
1181
1182 return true;
1183 }
1184
1185 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1186 Note we never remove apart from at destruction time so we do not
1187 need a special value for deleted that differs from empty. */
1188 struct bst_traits
1189 {
1190 typedef vec <stmt_vec_info> value_type;
1191 typedef vec <stmt_vec_info> compare_type;
1192 static inline hashval_t hash (value_type);
1193 static inline bool equal (value_type existing, value_type candidate);
1194 static inline bool is_empty (value_type x) { return !x.exists (); }
1195 static inline bool is_deleted (value_type x) { return !x.exists (); }
1196 static const bool empty_zero_p = true;
1197 static inline void mark_empty (value_type &x) { x.release (); }
1198 static inline void mark_deleted (value_type &x) { x.release (); }
1199 static inline void remove (value_type &x) { x.release (); }
1200 };
1201 inline hashval_t
1202 bst_traits::hash (value_type x)
1203 {
1204 inchash::hash h;
1205 for (unsigned i = 0; i < x.length (); ++i)
1206 h.add_int (gimple_uid (x[i]->stmt));
1207 return h.end ();
1208 }
1209 inline bool
1210 bst_traits::equal (value_type existing, value_type candidate)
1211 {
1212 if (existing.length () != candidate.length ())
1213 return false;
1214 for (unsigned i = 0; i < existing.length (); ++i)
1215 if (existing[i] != candidate[i])
1216 return false;
1217 return true;
1218 }
1219
1220 typedef hash_map <vec <gimple *>, slp_tree,
1221 simple_hashmap_traits <bst_traits, slp_tree> >
1222 scalar_stmts_to_slp_tree_map_t;
1223
1224 static slp_tree
1225 vect_build_slp_tree_2 (vec_info *vinfo,
1226 vec<stmt_vec_info> stmts, unsigned int group_size,
1227 poly_uint64 *max_nunits,
1228 bool *matches, unsigned *npermutes, unsigned *tree_size,
1229 scalar_stmts_to_slp_tree_map_t *bst_map);
1230
1231 static slp_tree
1232 vect_build_slp_tree (vec_info *vinfo,
1233 vec<stmt_vec_info> stmts, unsigned int group_size,
1234 poly_uint64 *max_nunits,
1235 bool *matches, unsigned *npermutes, unsigned *tree_size,
1236 scalar_stmts_to_slp_tree_map_t *bst_map)
1237 {
1238 if (slp_tree *leader = bst_map->get (stmts))
1239 {
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1242 *leader ? "" : "failed ", *leader);
1243 if (*leader)
1244 {
1245 (*leader)->refcnt++;
1246 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1247 }
1248 return *leader;
1249 }
1250 poly_uint64 this_max_nunits = 1;
1251 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
1252 &this_max_nunits,
1253 matches, npermutes, tree_size, bst_map);
1254 if (res)
1255 {
1256 res->max_nunits = this_max_nunits;
1257 vect_update_max_nunits (max_nunits, this_max_nunits);
1258 /* Keep a reference for the bst_map use. */
1259 res->refcnt++;
1260 }
1261 bst_map->put (stmts.copy (), res);
1262 return res;
1263 }
1264
1265 /* Recursively build an SLP tree starting from NODE.
1266 Fail (and return a value not equal to zero) if def-stmts are not
1267 isomorphic, require data permutation or are of unsupported types of
1268 operation. Otherwise, return 0.
1269 The value returned is the depth in the SLP tree where a mismatch
1270 was found. */
1271
1272 static slp_tree
1273 vect_build_slp_tree_2 (vec_info *vinfo,
1274 vec<stmt_vec_info> stmts, unsigned int group_size,
1275 poly_uint64 *max_nunits,
1276 bool *matches, unsigned *npermutes, unsigned *tree_size,
1277 scalar_stmts_to_slp_tree_map_t *bst_map)
1278 {
1279 unsigned nops, i, this_tree_size = 0;
1280 poly_uint64 this_max_nunits = *max_nunits;
1281 slp_tree node;
1282
1283 matches[0] = false;
1284
1285 stmt_vec_info stmt_info = stmts[0];
1286 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1287 nops = gimple_call_num_args (stmt);
1288 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1289 {
1290 nops = gimple_num_ops (stmt) - 1;
1291 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1292 nops++;
1293 }
1294 else if (is_a <gphi *> (stmt_info->stmt))
1295 nops = 0;
1296 else
1297 return NULL;
1298
1299 /* If the SLP node is a PHI (induction or reduction), terminate
1300 the recursion. */
1301 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1302 {
1303 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1304 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
1305 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1306 return NULL;
1307
1308 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1309 /* Induction from different IVs is not supported. */
1310 if (def_type == vect_induction_def)
1311 {
1312 stmt_vec_info other_info;
1313 FOR_EACH_VEC_ELT (stmts, i, other_info)
1314 if (stmt_info != other_info)
1315 return NULL;
1316 }
1317 else if (def_type == vect_reduction_def
1318 || def_type == vect_double_reduction_def
1319 || def_type == vect_nested_cycle)
1320 {
1321 /* Else def types have to match. */
1322 stmt_vec_info other_info;
1323 FOR_EACH_VEC_ELT (stmts, i, other_info)
1324 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1325 return NULL;
1326 }
1327 else
1328 return NULL;
1329 (*tree_size)++;
1330 node = vect_create_new_slp_node (stmts);
1331 return node;
1332 }
1333
1334
1335 bool two_operators = false;
1336 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1337 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1338 &this_max_nunits, matches, &two_operators))
1339 return NULL;
1340
1341 /* If the SLP node is a load, terminate the recursion unless masked. */
1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1343 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1344 {
1345 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1346 {
1347 /* Masked load. */
1348 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1349 nops = 1;
1350 }
1351 else
1352 {
1353 *max_nunits = this_max_nunits;
1354 (*tree_size)++;
1355 node = vect_create_new_slp_node (stmts);
1356 /* And compute the load permutation. Whether it is actually
1357 a permutation depends on the unrolling factor which is
1358 decided later. */
1359 vec<unsigned> load_permutation;
1360 int j;
1361 stmt_vec_info load_info;
1362 load_permutation.create (group_size);
1363 stmt_vec_info first_stmt_info
1364 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
1365 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1366 {
1367 int load_place = vect_get_place_in_interleaving_chain
1368 (load_info, first_stmt_info);
1369 gcc_assert (load_place != -1);
1370 load_permutation.safe_push (load_place);
1371 }
1372 SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
1373 return node;
1374 }
1375 }
1376
1377 /* Get at the operands, verifying they are compatible. */
1378 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1379 slp_oprnd_info oprnd_info;
1380 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1381 {
1382 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1383 stmts, i, &oprnds_info);
1384 if (res != 0)
1385 matches[(res == -1) ? 0 : i] = false;
1386 if (!matches[0])
1387 break;
1388 }
1389 for (i = 0; i < group_size; ++i)
1390 if (!matches[i])
1391 {
1392 vect_free_oprnd_info (oprnds_info);
1393 return NULL;
1394 }
1395
1396 auto_vec<slp_tree, 4> children;
1397
1398 stmt_info = stmts[0];
1399
1400 /* Create SLP_TREE nodes for the definition node/s. */
1401 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1402 {
1403 slp_tree child;
1404 unsigned old_tree_size = this_tree_size;
1405 unsigned int j;
1406
1407 if (oprnd_info->first_dt == vect_uninitialized_def)
1408 {
1409 /* COND_EXPR have one too many eventually if the condition
1410 is a SSA name. */
1411 gcc_assert (i == 3 && nops == 4);
1412 continue;
1413 }
1414
1415 if (oprnd_info->first_dt != vect_internal_def
1416 && oprnd_info->first_dt != vect_reduction_def
1417 && oprnd_info->first_dt != vect_induction_def)
1418 {
1419 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1420 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1421 oprnd_info->ops = vNULL;
1422 children.safe_push (invnode);
1423 continue;
1424 }
1425
1426 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1427 group_size, &this_max_nunits,
1428 matches, npermutes,
1429 &this_tree_size, bst_map)) != NULL)
1430 {
1431 /* If we have all children of a non-unary child built up from
1432 scalars then just throw that away and build it up this node
1433 from scalars. */
1434 if (is_a <bb_vec_info> (vinfo)
1435 && SLP_TREE_CHILDREN (child).length () > 1
1436 /* ??? Rejecting patterns this way doesn't work. We'd have to
1437 do extra work to cancel the pattern so the uses see the
1438 scalar version. */
1439 && !oprnd_info->any_pattern)
1440 {
1441 slp_tree grandchild;
1442
1443 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1444 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1445 break;
1446 if (!grandchild
1447 && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1448 {
1449 /* Roll back. */
1450 this_tree_size = old_tree_size;
1451 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1452 vect_free_slp_tree (grandchild, false);
1453 SLP_TREE_CHILDREN (child).truncate (0);
1454
1455 if (dump_enabled_p ())
1456 dump_printf_loc (MSG_NOTE, vect_location,
1457 "Building parent vector operands from "
1458 "scalars instead\n");
1459 oprnd_info->def_stmts = vNULL;
1460 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1461 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1462 oprnd_info->ops = vNULL;
1463 ++this_tree_size;
1464 children.safe_push (child);
1465 continue;
1466 }
1467 }
1468
1469 oprnd_info->def_stmts = vNULL;
1470 children.safe_push (child);
1471 continue;
1472 }
1473
1474 /* If the SLP build failed fatally and we analyze a basic-block
1475 simply treat nodes we fail to build as externally defined
1476 (and thus build vectors from the scalar defs).
1477 The cost model will reject outright expensive cases.
1478 ??? This doesn't treat cases where permutation ultimatively
1479 fails (or we don't try permutation below). Ideally we'd
1480 even compute a permutation that will end up with the maximum
1481 SLP tree size... */
1482 if (is_a <bb_vec_info> (vinfo)
1483 && !matches[0]
1484 /* ??? Rejecting patterns this way doesn't work. We'd have to
1485 do extra work to cancel the pattern so the uses see the
1486 scalar version. */
1487 && !is_pattern_stmt_p (stmt_info)
1488 && !oprnd_info->any_pattern
1489 && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1490 {
1491 if (dump_enabled_p ())
1492 dump_printf_loc (MSG_NOTE, vect_location,
1493 "Building vector operands from scalars\n");
1494 this_tree_size++;
1495 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1496 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1497 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1498 children.safe_push (child);
1499 oprnd_info->ops = vNULL;
1500 oprnd_info->def_stmts = vNULL;
1501 continue;
1502 }
1503
1504 /* If the SLP build for operand zero failed and operand zero
1505 and one can be commutated try that for the scalar stmts
1506 that failed the match. */
1507 if (i == 0
1508 /* A first scalar stmt mismatch signals a fatal mismatch. */
1509 && matches[0]
1510 /* ??? For COND_EXPRs we can swap the comparison operands
1511 as well as the arms under some constraints. */
1512 && nops == 2
1513 && oprnds_info[1]->first_dt == vect_internal_def
1514 && is_gimple_assign (stmt_info->stmt)
1515 /* Swapping operands for reductions breaks assumptions later on. */
1516 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1517 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1518 /* Do so only if the number of not successful permutes was nor more
1519 than a cut-ff as re-trying the recursive match on
1520 possibly each level of the tree would expose exponential
1521 behavior. */
1522 && *npermutes < 4)
1523 {
1524 /* See whether we can swap the matching or the non-matching
1525 stmt operands. */
1526 bool swap_not_matching = true;
1527 do
1528 {
1529 for (j = 0; j < group_size; ++j)
1530 {
1531 if (matches[j] != !swap_not_matching)
1532 continue;
1533 stmt_vec_info stmt_info = stmts[j];
1534 /* Verify if we can swap operands of this stmt. */
1535 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1536 if (!stmt
1537 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1538 {
1539 if (!swap_not_matching)
1540 goto fail;
1541 swap_not_matching = false;
1542 break;
1543 }
1544 }
1545 }
1546 while (j != group_size);
1547
1548 /* Swap mismatched definition stmts. */
1549 if (dump_enabled_p ())
1550 dump_printf_loc (MSG_NOTE, vect_location,
1551 "Re-trying with swapped operands of stmts ");
1552 for (j = 0; j < group_size; ++j)
1553 if (matches[j] == !swap_not_matching)
1554 {
1555 std::swap (oprnds_info[0]->def_stmts[j],
1556 oprnds_info[1]->def_stmts[j]);
1557 std::swap (oprnds_info[0]->ops[j],
1558 oprnds_info[1]->ops[j]);
1559 if (dump_enabled_p ())
1560 dump_printf (MSG_NOTE, "%d ", j);
1561 }
1562 if (dump_enabled_p ())
1563 dump_printf (MSG_NOTE, "\n");
1564 /* And try again with scratch 'matches' ... */
1565 bool *tem = XALLOCAVEC (bool, group_size);
1566 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1567 group_size, &this_max_nunits,
1568 tem, npermutes,
1569 &this_tree_size, bst_map)) != NULL)
1570 {
1571 /* If we have all children of a non-unary child built up from
1572 scalars then just throw that away and build it up this node
1573 from scalars. */
1574 if (is_a <bb_vec_info> (vinfo)
1575 && SLP_TREE_CHILDREN (child).length () > 1
1576 /* ??? Rejecting patterns this way doesn't work. We'd have
1577 to do extra work to cancel the pattern so the uses see the
1578 scalar version. */
1579 && !oprnd_info->any_pattern)
1580 {
1581 unsigned int j;
1582 slp_tree grandchild;
1583
1584 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1585 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1586 break;
1587 if (!grandchild
1588 && (vect_update_all_shared_vectypes
1589 (oprnd_info->def_stmts)))
1590 {
1591 /* Roll back. */
1592 this_tree_size = old_tree_size;
1593 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1594 vect_free_slp_tree (grandchild, false);
1595 SLP_TREE_CHILDREN (child).truncate (0);
1596
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_NOTE, vect_location,
1599 "Building parent vector operands from "
1600 "scalars instead\n");
1601 oprnd_info->def_stmts = vNULL;
1602 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1603 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1604 oprnd_info->ops = vNULL;
1605 ++this_tree_size;
1606 children.safe_push (child);
1607 continue;
1608 }
1609 }
1610
1611 oprnd_info->def_stmts = vNULL;
1612 children.safe_push (child);
1613 continue;
1614 }
1615
1616 ++*npermutes;
1617 }
1618
1619 fail:
1620 gcc_assert (child == NULL);
1621 FOR_EACH_VEC_ELT (children, j, child)
1622 vect_free_slp_tree (child, false);
1623 vect_free_oprnd_info (oprnds_info);
1624 return NULL;
1625 }
1626
1627 vect_free_oprnd_info (oprnds_info);
1628
1629 *tree_size += this_tree_size + 1;
1630 *max_nunits = this_max_nunits;
1631
1632 node = vect_create_new_slp_node (stmts);
1633 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1634 SLP_TREE_CHILDREN (node).splice (children);
1635 return node;
1636 }
1637
1638 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1639
1640 static void
1641 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1642 slp_tree node, hash_set<slp_tree> &visited)
1643 {
1644 unsigned i;
1645 stmt_vec_info stmt_info;
1646 slp_tree child;
1647 tree op;
1648
1649 if (visited.add (node))
1650 return;
1651
1652 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1653 dump_user_location_t user_loc = loc.get_user_location ();
1654 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
1655 SLP_TREE_DEF_TYPE (node) == vect_external_def
1656 ? " (external)"
1657 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1658 ? " (constant)"
1659 : ""), node,
1660 estimated_poly_value (node->max_nunits));
1661 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1662 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1663 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1664 else
1665 {
1666 dump_printf_loc (metadata, user_loc, "\t{ ");
1667 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1668 dump_printf (metadata, "%T%s ", op,
1669 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1670 dump_printf (metadata, "}\n");
1671 }
1672 if (SLP_TREE_CHILDREN (node).is_empty ())
1673 return;
1674 dump_printf_loc (metadata, user_loc, "\tchildren");
1675 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1676 dump_printf (dump_kind, " %p", (void *)child);
1677 dump_printf (dump_kind, "\n");
1678 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1679 vect_print_slp_tree (dump_kind, loc, child, visited);
1680 }
1681
1682 static void
1683 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1684 slp_tree node)
1685 {
1686 hash_set<slp_tree> visited;
1687 vect_print_slp_tree (dump_kind, loc, node, visited);
1688 }
1689
1690 /* Mark the tree rooted at NODE with PURE_SLP. */
1691
1692 static void
1693 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1694 {
1695 int i;
1696 stmt_vec_info stmt_info;
1697 slp_tree child;
1698
1699 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1700 return;
1701
1702 if (visited.add (node))
1703 return;
1704
1705 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1706 STMT_SLP_TYPE (stmt_info) = pure_slp;
1707
1708 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1709 vect_mark_slp_stmts (child, visited);
1710 }
1711
1712 static void
1713 vect_mark_slp_stmts (slp_tree node)
1714 {
1715 hash_set<slp_tree> visited;
1716 vect_mark_slp_stmts (node, visited);
1717 }
1718
1719 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1720
1721 static void
1722 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1723 {
1724 int i;
1725 stmt_vec_info stmt_info;
1726 slp_tree child;
1727
1728 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1729 return;
1730
1731 if (visited.add (node))
1732 return;
1733
1734 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1735 {
1736 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1737 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1738 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1739 }
1740
1741 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1742 vect_mark_slp_stmts_relevant (child, visited);
1743 }
1744
1745 static void
1746 vect_mark_slp_stmts_relevant (slp_tree node)
1747 {
1748 hash_set<slp_tree> visited;
1749 vect_mark_slp_stmts_relevant (node, visited);
1750 }
1751
1752 /* Copy the SLP subtree rooted at NODE. */
1753
1754 static slp_tree
1755 slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
1756 {
1757 unsigned i;
1758
1759 bool existed_p;
1760 slp_tree &copy = map.get_or_insert (node, &existed_p);
1761 if (existed_p)
1762 return copy;
1763
1764 copy = XNEW (_slp_tree);
1765 memcpy (copy, node, sizeof (_slp_tree));
1766 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1767 {
1768 SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
1769 stmt_vec_info stmt_info;
1770 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1771 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
1772 }
1773 if (SLP_TREE_SCALAR_OPS (node).exists ())
1774 SLP_TREE_SCALAR_OPS (copy) = SLP_TREE_SCALAR_OPS (node).copy ();
1775 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1776 SLP_TREE_LOAD_PERMUTATION (copy) = SLP_TREE_LOAD_PERMUTATION (node).copy ();
1777 if (SLP_TREE_CHILDREN (node).exists ())
1778 SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
1779 gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
1780 copy->refcnt = 0;
1781
1782 slp_tree child;
1783 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
1784 {
1785 SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map);
1786 SLP_TREE_CHILDREN (copy)[i]->refcnt++;
1787 }
1788 return copy;
1789 }
1790
1791 /* Rearrange the statements of NODE according to PERMUTATION. */
1792
1793 static void
1794 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1795 vec<unsigned> permutation,
1796 hash_set<slp_tree> &visited)
1797 {
1798 unsigned int i;
1799 slp_tree child;
1800
1801 if (visited.add (node))
1802 return;
1803
1804 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1805 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1806
1807 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1808 {
1809 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1810 vec<stmt_vec_info> tmp_stmts;
1811 tmp_stmts.create (group_size);
1812 tmp_stmts.quick_grow (group_size);
1813 stmt_vec_info stmt_info;
1814 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1815 tmp_stmts[permutation[i]] = stmt_info;
1816 SLP_TREE_SCALAR_STMTS (node).release ();
1817 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1818 }
1819 if (SLP_TREE_SCALAR_OPS (node).exists ())
1820 {
1821 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1822 vec<tree> tmp_ops;
1823 tmp_ops.create (group_size);
1824 tmp_ops.quick_grow (group_size);
1825 tree op;
1826 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1827 tmp_ops[permutation[i]] = op;
1828 SLP_TREE_SCALAR_OPS (node).release ();
1829 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1830 }
1831 }
1832
1833
1834 /* Attempt to reorder stmts in a reduction chain so that we don't
1835 require any load permutation. Return true if that was possible,
1836 otherwise return false. */
1837
1838 static bool
1839 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1840 {
1841 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1842 unsigned int i, j;
1843 unsigned int lidx;
1844 slp_tree node, load;
1845
1846 /* Compare all the permutation sequences to the first one. We know
1847 that at least one load is permuted. */
1848 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1849 if (!node->load_permutation.exists ())
1850 return false;
1851 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1852 {
1853 if (!load->load_permutation.exists ())
1854 return false;
1855 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1856 if (lidx != node->load_permutation[j])
1857 return false;
1858 }
1859
1860 /* Check that the loads in the first sequence are different and there
1861 are no gaps between them. */
1862 auto_sbitmap load_index (group_size);
1863 bitmap_clear (load_index);
1864 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1865 {
1866 if (lidx >= group_size)
1867 return false;
1868 if (bitmap_bit_p (load_index, lidx))
1869 return false;
1870
1871 bitmap_set_bit (load_index, lidx);
1872 }
1873 for (i = 0; i < group_size; i++)
1874 if (!bitmap_bit_p (load_index, i))
1875 return false;
1876
1877 /* This permutation is valid for reduction. Since the order of the
1878 statements in the nodes is not important unless they are memory
1879 accesses, we can rearrange the statements in all the nodes
1880 according to the order of the loads. */
1881
1882 /* We have to unshare the SLP tree we modify. */
1883 hash_map<slp_tree, slp_tree> map;
1884 slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map);
1885 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn), false);
1886 unshared->refcnt++;
1887 SLP_INSTANCE_TREE (slp_instn) = unshared;
1888 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1889 SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node);
1890 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1891
1892 /* Do the actual re-arrangement. */
1893 hash_set<slp_tree> visited;
1894 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1895 node->load_permutation, visited);
1896
1897 /* We are done, no actual permutations need to be generated. */
1898 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1899 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1900 {
1901 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1902 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1903 /* But we have to keep those permutations that are required because
1904 of handling of gaps. */
1905 if (known_eq (unrolling_factor, 1U)
1906 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1907 && DR_GROUP_GAP (first_stmt_info) == 0))
1908 SLP_TREE_LOAD_PERMUTATION (node).release ();
1909 else
1910 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1911 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1912 }
1913
1914 return true;
1915 }
1916
1917 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1918
1919 static void
1920 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1921 hash_set<slp_tree> &visited)
1922 {
1923 if (visited.add (node))
1924 return;
1925
1926 if (SLP_TREE_CHILDREN (node).length () == 0)
1927 {
1928 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1929 return;
1930 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1931 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1932 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1933 SLP_INSTANCE_LOADS (inst).safe_push (node);
1934 }
1935 else
1936 {
1937 unsigned i;
1938 slp_tree child;
1939 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1940 vect_gather_slp_loads (inst, child, visited);
1941 }
1942 }
1943
1944 static void
1945 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1946 {
1947 hash_set<slp_tree> visited;
1948 vect_gather_slp_loads (inst, node, visited);
1949 }
1950
1951 /* Check if the required load permutations in the SLP instance
1952 SLP_INSTN are supported. */
1953
1954 static bool
1955 vect_supported_load_permutation_p (slp_instance slp_instn)
1956 {
1957 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1958 unsigned int i, j, k, next;
1959 slp_tree node;
1960
1961 if (dump_enabled_p ())
1962 {
1963 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1964 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1965 if (node->load_permutation.exists ())
1966 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1967 dump_printf (MSG_NOTE, "%d ", next);
1968 else
1969 for (k = 0; k < group_size; ++k)
1970 dump_printf (MSG_NOTE, "%d ", k);
1971 dump_printf (MSG_NOTE, "\n");
1972 }
1973
1974 /* In case of reduction every load permutation is allowed, since the order
1975 of the reduction statements is not important (as opposed to the case of
1976 grouped stores). The only condition we need to check is that all the
1977 load nodes are of the same size and have the same permutation (and then
1978 rearrange all the nodes of the SLP instance according to this
1979 permutation). */
1980
1981 /* Check that all the load nodes are of the same size. */
1982 /* ??? Can't we assert this? */
1983 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1984 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1985 return false;
1986
1987 node = SLP_INSTANCE_TREE (slp_instn);
1988 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1989
1990 /* Reduction (there are no data-refs in the root).
1991 In reduction chain the order of the loads is not important. */
1992 if (!STMT_VINFO_DATA_REF (stmt_info)
1993 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1994 vect_attempt_slp_rearrange_stmts (slp_instn);
1995
1996 /* In basic block vectorization we allow any subchain of an interleaving
1997 chain.
1998 FORNOW: not supported in loop SLP because of realignment compications. */
1999 if (STMT_VINFO_BB_VINFO (stmt_info))
2000 {
2001 /* Check whether the loads in an instance form a subchain and thus
2002 no permutation is necessary. */
2003 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
2004 {
2005 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
2006 continue;
2007 bool subchain_p = true;
2008 stmt_vec_info next_load_info = NULL;
2009 stmt_vec_info load_info;
2010 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
2011 {
2012 if (j != 0
2013 && (next_load_info != load_info
2014 || DR_GROUP_GAP (load_info) != 1))
2015 {
2016 subchain_p = false;
2017 break;
2018 }
2019 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
2020 }
2021 if (subchain_p)
2022 SLP_TREE_LOAD_PERMUTATION (node).release ();
2023 else
2024 {
2025 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
2026 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
2027 unsigned HOST_WIDE_INT nunits;
2028 unsigned k, maxk = 0;
2029 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
2030 if (k > maxk)
2031 maxk = k;
2032 /* In BB vectorization we may not actually use a loaded vector
2033 accessing elements in excess of DR_GROUP_SIZE. */
2034 tree vectype = STMT_VINFO_VECTYPE (group_info);
2035 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2036 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
2037 {
2038 if (dump_enabled_p ())
2039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2040 "BB vectorization with gaps at the end of "
2041 "a load is not supported\n");
2042 return false;
2043 }
2044
2045 /* Verify the permutation can be generated. */
2046 vec<tree> tem;
2047 unsigned n_perms;
2048 if (!vect_transform_slp_perm_load (node, tem, NULL,
2049 1, slp_instn, true, &n_perms))
2050 {
2051 if (dump_enabled_p ())
2052 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2053 vect_location,
2054 "unsupported load permutation\n");
2055 return false;
2056 }
2057 }
2058 }
2059 return true;
2060 }
2061
2062 /* For loop vectorization verify we can generate the permutation. Be
2063 conservative about the vectorization factor, there are permutations
2064 that will use three vector inputs only starting from a specific factor
2065 and the vectorization factor is not yet final.
2066 ??? The SLP instance unrolling factor might not be the maximum one. */
2067 unsigned n_perms;
2068 poly_uint64 test_vf
2069 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
2070 LOOP_VINFO_VECT_FACTOR
2071 (STMT_VINFO_LOOP_VINFO (stmt_info)));
2072 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
2073 if (node->load_permutation.exists ()
2074 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
2075 slp_instn, true, &n_perms))
2076 return false;
2077
2078 return true;
2079 }
2080
2081
2082 /* Find the last store in SLP INSTANCE. */
2083
2084 stmt_vec_info
2085 vect_find_last_scalar_stmt_in_slp (slp_tree node)
2086 {
2087 stmt_vec_info last = NULL;
2088 stmt_vec_info stmt_vinfo;
2089
2090 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2091 {
2092 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
2093 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2094 }
2095
2096 return last;
2097 }
2098
2099 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2100 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2101 (also containing the first GROUP1_SIZE stmts, since stores are
2102 consecutive), the second containing the remainder.
2103 Return the first stmt in the second group. */
2104
2105 static stmt_vec_info
2106 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
2107 {
2108 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
2109 gcc_assert (group1_size > 0);
2110 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
2111 gcc_assert (group2_size > 0);
2112 DR_GROUP_SIZE (first_vinfo) = group1_size;
2113
2114 stmt_vec_info stmt_info = first_vinfo;
2115 for (unsigned i = group1_size; i > 1; i--)
2116 {
2117 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2118 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2119 }
2120 /* STMT is now the last element of the first group. */
2121 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
2122 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
2123
2124 DR_GROUP_SIZE (group2) = group2_size;
2125 for (stmt_info = group2; stmt_info;
2126 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
2127 {
2128 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
2129 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2130 }
2131
2132 /* For the second group, the DR_GROUP_GAP is that before the original group,
2133 plus skipping over the first vector. */
2134 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
2135
2136 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
2137 DR_GROUP_GAP (first_vinfo) += group2_size;
2138
2139 if (dump_enabled_p ())
2140 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
2141 group1_size, group2_size);
2142
2143 return group2;
2144 }
2145
2146 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2147 statements and a vector of NUNITS elements. */
2148
2149 static poly_uint64
2150 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
2151 {
2152 return exact_div (common_multiple (nunits, group_size), group_size);
2153 }
2154
2155 /* Analyze an SLP instance starting from a group of grouped stores. Call
2156 vect_build_slp_tree to build a tree of packed stmts if possible.
2157 Return FALSE if it's impossible to SLP any stmt in the loop. */
2158
2159 static bool
2160 vect_analyze_slp_instance (vec_info *vinfo,
2161 scalar_stmts_to_slp_tree_map_t *bst_map,
2162 stmt_vec_info stmt_info, unsigned max_tree_size)
2163 {
2164 slp_instance new_instance;
2165 slp_tree node;
2166 unsigned int group_size;
2167 tree vectype, scalar_type = NULL_TREE;
2168 unsigned int i;
2169 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2170 vec<stmt_vec_info> scalar_stmts;
2171 bool constructor = false;
2172
2173 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2174 {
2175 scalar_type = TREE_TYPE (DR_REF (dr));
2176 group_size = DR_GROUP_SIZE (stmt_info);
2177 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
2178 }
2179 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2180 {
2181 gcc_assert (is_a <loop_vec_info> (vinfo));
2182 vectype = STMT_VINFO_VECTYPE (stmt_info);
2183 group_size = REDUC_GROUP_SIZE (stmt_info);
2184 }
2185 else if (is_gimple_assign (stmt_info->stmt)
2186 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2187 {
2188 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2189 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2190 constructor = true;
2191 }
2192 else
2193 {
2194 gcc_assert (is_a <loop_vec_info> (vinfo));
2195 vectype = STMT_VINFO_VECTYPE (stmt_info);
2196 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2197 }
2198
2199 if (!vectype)
2200 {
2201 if (dump_enabled_p ())
2202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2203 "Build SLP failed: unsupported data-type %T\n",
2204 scalar_type);
2205
2206 return false;
2207 }
2208 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2209
2210 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2211 scalar_stmts.create (group_size);
2212 stmt_vec_info next_info = stmt_info;
2213 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2214 {
2215 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2216 while (next_info)
2217 {
2218 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2219 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2220 }
2221 }
2222 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2223 {
2224 /* Collect the reduction stmts and store them in
2225 SLP_TREE_SCALAR_STMTS. */
2226 while (next_info)
2227 {
2228 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2229 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2230 }
2231 /* Mark the first element of the reduction chain as reduction to properly
2232 transform the node. In the reduction analysis phase only the last
2233 element of the chain is marked as reduction. */
2234 STMT_VINFO_DEF_TYPE (stmt_info)
2235 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
2236 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2237 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
2238 }
2239 else if (constructor)
2240 {
2241 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2242 tree val;
2243 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2244 {
2245 if (TREE_CODE (val) == SSA_NAME)
2246 {
2247 gimple* def = SSA_NAME_DEF_STMT (val);
2248 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2249 /* Value is defined in another basic block. */
2250 if (!def_info)
2251 return false;
2252 scalar_stmts.safe_push (def_info);
2253 }
2254 else
2255 return false;
2256 }
2257 if (dump_enabled_p ())
2258 dump_printf_loc (MSG_NOTE, vect_location,
2259 "Analyzing vectorizable constructor: %G\n",
2260 stmt_info->stmt);
2261 }
2262 else
2263 {
2264 /* Collect reduction statements. */
2265 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2266 for (i = 0; reductions.iterate (i, &next_info); i++)
2267 scalar_stmts.safe_push (next_info);
2268 }
2269
2270 /* Build the tree for the SLP instance. */
2271 bool *matches = XALLOCAVEC (bool, group_size);
2272 unsigned npermutes = 0;
2273 poly_uint64 max_nunits = nunits;
2274 unsigned tree_size = 0;
2275 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2276 &max_nunits, matches, &npermutes,
2277 &tree_size, bst_map);
2278 if (node != NULL)
2279 {
2280 /* Calculate the unrolling factor based on the smallest type. */
2281 poly_uint64 unrolling_factor
2282 = calculate_unrolling_factor (max_nunits, group_size);
2283
2284 if (maybe_ne (unrolling_factor, 1U)
2285 && is_a <bb_vec_info> (vinfo))
2286 {
2287 unsigned HOST_WIDE_INT const_max_nunits;
2288 if (!max_nunits.is_constant (&const_max_nunits)
2289 || const_max_nunits > group_size)
2290 {
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2293 "Build SLP failed: store group "
2294 "size not a multiple of the vector size "
2295 "in basic block SLP\n");
2296 vect_free_slp_tree (node, false);
2297 return false;
2298 }
2299 /* Fatal mismatch. */
2300 matches[group_size / const_max_nunits * const_max_nunits] = false;
2301 vect_free_slp_tree (node, false);
2302 }
2303 else
2304 {
2305 /* Create a new SLP instance. */
2306 new_instance = XNEW (class _slp_instance);
2307 SLP_INSTANCE_TREE (new_instance) = node;
2308 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2309 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2310 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2311 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2312
2313 vect_gather_slp_loads (new_instance, node);
2314 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_NOTE, vect_location,
2316 "SLP size %u vs. limit %u.\n",
2317 tree_size, max_tree_size);
2318
2319 /* Compute the load permutation. */
2320 slp_tree load_node;
2321 bool loads_permuted = false;
2322 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2323 {
2324 if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
2325 continue;
2326 unsigned j;
2327 stmt_vec_info load_info;
2328 bool this_load_permuted = false;
2329 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2330 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2331 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2332 if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
2333 {
2334 this_load_permuted = true;
2335 break;
2336 }
2337 if (!this_load_permuted
2338 /* The load requires permutation when unrolling exposes
2339 a gap either because the group is larger than the SLP
2340 group-size or because there is a gap between the groups. */
2341 && (known_eq (unrolling_factor, 1U)
2342 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2343 && DR_GROUP_GAP (first_stmt_info) == 0)))
2344 {
2345 SLP_TREE_LOAD_PERMUTATION (load_node).release ();
2346 continue;
2347 }
2348 loads_permuted = true;
2349 }
2350
2351 if (loads_permuted)
2352 {
2353 if (!vect_supported_load_permutation_p (new_instance))
2354 {
2355 if (dump_enabled_p ())
2356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2357 "Build SLP failed: unsupported load "
2358 "permutation %G", stmt_info->stmt);
2359 vect_free_slp_instance (new_instance, false);
2360 return false;
2361 }
2362 }
2363
2364 /* If the loads and stores can be handled with load/store-lan
2365 instructions do not generate this SLP instance. */
2366 if (is_a <loop_vec_info> (vinfo)
2367 && loads_permuted
2368 && dr && vect_store_lanes_supported (vectype, group_size, false))
2369 {
2370 slp_tree load_node;
2371 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2372 {
2373 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2374 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2375 /* Use SLP for strided accesses (or if we can't load-lanes). */
2376 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2377 || ! vect_load_lanes_supported
2378 (STMT_VINFO_VECTYPE (stmt_vinfo),
2379 DR_GROUP_SIZE (stmt_vinfo), false))
2380 break;
2381 }
2382 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2383 {
2384 if (dump_enabled_p ())
2385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2386 "Built SLP cancelled: can use "
2387 "load/store-lanes\n");
2388 vect_free_slp_instance (new_instance, false);
2389 return false;
2390 }
2391 }
2392
2393 /* If this is a reduction chain with a conversion in front
2394 amend the SLP tree with a node for that. */
2395 if (!dr
2396 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2397 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2398 {
2399 /* Get at the conversion stmt - we know it's the single use
2400 of the last stmt of the reduction chain. */
2401 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2402 use_operand_p use_p;
2403 gimple *use_stmt;
2404 bool r = single_imm_use (gimple_assign_lhs (tem),
2405 &use_p, &use_stmt);
2406 gcc_assert (r);
2407 next_info = vinfo->lookup_stmt (use_stmt);
2408 next_info = vect_stmt_to_vectorize (next_info);
2409 scalar_stmts = vNULL;
2410 scalar_stmts.create (group_size);
2411 for (unsigned i = 0; i < group_size; ++i)
2412 scalar_stmts.quick_push (next_info);
2413 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2414 SLP_TREE_CHILDREN (conv).quick_push (node);
2415 SLP_INSTANCE_TREE (new_instance) = conv;
2416 /* We also have to fake this conversion stmt as SLP reduction
2417 group so we don't have to mess with too much code
2418 elsewhere. */
2419 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2420 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2421 }
2422
2423 vinfo->slp_instances.safe_push (new_instance);
2424
2425 if (dump_enabled_p ())
2426 {
2427 dump_printf_loc (MSG_NOTE, vect_location,
2428 "Final SLP tree for instance:\n");
2429 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2430 }
2431
2432 return true;
2433 }
2434 }
2435 else
2436 {
2437 /* Failed to SLP. */
2438 /* Free the allocated memory. */
2439 scalar_stmts.release ();
2440 }
2441
2442 /* For basic block SLP, try to break the group up into multiples of the
2443 vector size. */
2444 unsigned HOST_WIDE_INT const_nunits;
2445 if (is_a <bb_vec_info> (vinfo)
2446 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2447 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2448 && nunits.is_constant (&const_nunits))
2449 {
2450 /* We consider breaking the group only on VF boundaries from the existing
2451 start. */
2452 for (i = 0; i < group_size; i++)
2453 if (!matches[i]) break;
2454
2455 if (i >= const_nunits && i < group_size)
2456 {
2457 /* Split into two groups at the first vector boundary before i. */
2458 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2459 unsigned group1_size = i & ~(const_nunits - 1);
2460
2461 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2462 group1_size);
2463 bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
2464 max_tree_size);
2465 /* If the first non-match was in the middle of a vector,
2466 skip the rest of that vector. */
2467 if (group1_size < i)
2468 {
2469 i = group1_size + const_nunits;
2470 if (i < group_size)
2471 rest = vect_split_slp_store_group (rest, const_nunits);
2472 }
2473 if (i < group_size)
2474 res |= vect_analyze_slp_instance (vinfo, bst_map,
2475 rest, max_tree_size);
2476 return res;
2477 }
2478 /* Even though the first vector did not all match, we might be able to SLP
2479 (some) of the remainder. FORNOW ignore this possibility. */
2480 }
2481
2482 return false;
2483 }
2484
2485
2486 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2487 trees of packed scalar stmts if SLP is possible. */
2488
2489 opt_result
2490 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2491 {
2492 unsigned int i;
2493 stmt_vec_info first_element;
2494
2495 DUMP_VECT_SCOPE ("vect_analyze_slp");
2496
2497 scalar_stmts_to_slp_tree_map_t *bst_map
2498 = new scalar_stmts_to_slp_tree_map_t ();
2499
2500 /* Find SLP sequences starting from groups of grouped stores. */
2501 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2502 vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
2503
2504 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2505 {
2506 if (loop_vinfo->reduction_chains.length () > 0)
2507 {
2508 /* Find SLP sequences starting from reduction chains. */
2509 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2510 if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
2511 max_tree_size))
2512 {
2513 /* Dissolve reduction chain group. */
2514 stmt_vec_info vinfo = first_element;
2515 stmt_vec_info last = NULL;
2516 while (vinfo)
2517 {
2518 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2519 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2520 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2521 last = vinfo;
2522 vinfo = next;
2523 }
2524 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2525 /* It can be still vectorized as part of an SLP reduction. */
2526 loop_vinfo->reductions.safe_push (last);
2527 }
2528 }
2529
2530 /* Find SLP sequences starting from groups of reductions. */
2531 if (loop_vinfo->reductions.length () > 1)
2532 vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
2533 max_tree_size);
2534 }
2535
2536 /* The map keeps a reference on SLP nodes built, release that. */
2537 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2538 it != bst_map->end (); ++it)
2539 if ((*it).second)
2540 vect_free_slp_tree ((*it).second, false);
2541 delete bst_map;
2542
2543 return opt_result::success ();
2544 }
2545
2546
2547 /* For each possible SLP instance decide whether to SLP it and calculate overall
2548 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2549 least one instance. */
2550
2551 bool
2552 vect_make_slp_decision (loop_vec_info loop_vinfo)
2553 {
2554 unsigned int i;
2555 poly_uint64 unrolling_factor = 1;
2556 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2557 slp_instance instance;
2558 int decided_to_slp = 0;
2559
2560 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2561
2562 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2563 {
2564 /* FORNOW: SLP if you can. */
2565 /* All unroll factors have the form:
2566
2567 GET_MODE_SIZE (vinfo->vector_mode) * X
2568
2569 for some rational X, so they must have a common multiple. */
2570 unrolling_factor
2571 = force_common_multiple (unrolling_factor,
2572 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2573
2574 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2575 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2576 loop-based vectorization. Such stmts will be marked as HYBRID. */
2577 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2578 decided_to_slp++;
2579 }
2580
2581 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2582
2583 if (decided_to_slp && dump_enabled_p ())
2584 {
2585 dump_printf_loc (MSG_NOTE, vect_location,
2586 "Decided to SLP %d instances. Unrolling factor ",
2587 decided_to_slp);
2588 dump_dec (MSG_NOTE, unrolling_factor);
2589 dump_printf (MSG_NOTE, "\n");
2590 }
2591
2592 return (decided_to_slp > 0);
2593 }
2594
2595
2596 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2597 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2598
2599 static void
2600 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
2601 hash_map<slp_tree, unsigned> &visited)
2602 {
2603 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2604 imm_use_iterator imm_iter;
2605 gimple *use_stmt;
2606 stmt_vec_info use_vinfo;
2607 slp_tree child;
2608 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2609 int j;
2610
2611 /* We need to union stype over the incoming graph edges but we still
2612 want to limit recursion to stay O(N+E). */
2613 unsigned visited_cnt = ++visited.get_or_insert (node);
2614 gcc_assert (visited_cnt <= node->refcnt);
2615 bool only_edge = (visited_cnt != node->refcnt);
2616
2617 /* Propagate hybrid down the SLP tree. */
2618 if (stype == hybrid)
2619 ;
2620 else if (HYBRID_SLP_STMT (stmt_vinfo))
2621 stype = hybrid;
2622 else if (!only_edge)
2623 {
2624 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2625 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2626 /* If we get a pattern stmt here we have to use the LHS of the
2627 original stmt for immediate uses. */
2628 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2629 tree def;
2630 if (gimple_code (stmt) == GIMPLE_PHI)
2631 def = gimple_phi_result (stmt);
2632 else
2633 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2634 if (def)
2635 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2636 {
2637 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2638 if (!use_vinfo)
2639 continue;
2640 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2641 if (!STMT_SLP_TYPE (use_vinfo)
2642 && (STMT_VINFO_RELEVANT (use_vinfo)
2643 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2644 && !(gimple_code (use_stmt) == GIMPLE_PHI
2645 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2646 {
2647 if (dump_enabled_p ())
2648 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2649 "def in non-SLP stmt: %G", use_stmt);
2650 stype = hybrid;
2651 }
2652 }
2653 }
2654
2655 if (stype == hybrid
2656 && !HYBRID_SLP_STMT (stmt_vinfo))
2657 {
2658 if (dump_enabled_p ())
2659 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2660 stmt_vinfo->stmt);
2661 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2662 }
2663
2664 if (!only_edge)
2665 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2666 if (SLP_TREE_DEF_TYPE (child) != vect_external_def
2667 && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
2668 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
2669 }
2670
2671 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2672
2673 static tree
2674 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2675 {
2676 walk_stmt_info *wi = (walk_stmt_info *)data;
2677 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2678
2679 if (wi->is_lhs)
2680 return NULL_TREE;
2681
2682 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2683 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2684 {
2685 if (dump_enabled_p ())
2686 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2687 def_stmt_info->stmt);
2688 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2689 }
2690
2691 return NULL_TREE;
2692 }
2693
2694 static tree
2695 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2696 walk_stmt_info *wi)
2697 {
2698 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2699 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2700 /* If the stmt is in a SLP instance then this isn't a reason
2701 to mark use definitions in other SLP instances as hybrid. */
2702 if (! STMT_SLP_TYPE (use_vinfo)
2703 && (STMT_VINFO_RELEVANT (use_vinfo)
2704 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2705 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2706 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2707 ;
2708 else
2709 *handled = true;
2710 return NULL_TREE;
2711 }
2712
2713 /* Find stmts that must be both vectorized and SLPed. */
2714
2715 void
2716 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2717 {
2718 unsigned int i;
2719 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2720 slp_instance instance;
2721
2722 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2723
2724 /* First walk all pattern stmt in the loop and mark defs of uses as
2725 hybrid because immediate uses in them are not recorded. */
2726 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2727 {
2728 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2729 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2730 gsi_next (&gsi))
2731 {
2732 gimple *stmt = gsi_stmt (gsi);
2733 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2734 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2735 {
2736 walk_stmt_info wi;
2737 memset (&wi, 0, sizeof (wi));
2738 wi.info = loop_vinfo;
2739 gimple_stmt_iterator gsi2
2740 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2741 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2742 vect_detect_hybrid_slp_1, &wi);
2743 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2744 vect_detect_hybrid_slp_2,
2745 vect_detect_hybrid_slp_1, &wi);
2746 }
2747 }
2748 }
2749
2750 /* Then walk the SLP instance trees marking stmts with uses in
2751 non-SLP stmts as hybrid, also propagating hybrid down the
2752 SLP tree, collecting the above info on-the-fly. */
2753 for (unsigned j = 0;; ++j)
2754 {
2755 hash_map<slp_tree, unsigned> visited;
2756 bool any = false;
2757 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2758 if (j < SLP_INSTANCE_GROUP_SIZE (instance))
2759 {
2760 any = true;
2761 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2762 j, pure_slp, visited);
2763 }
2764 if (!any)
2765 break;
2766 }
2767 }
2768
2769
2770 /* Initialize a bb_vec_info struct for the statements between
2771 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2772
2773 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2774 gimple_stmt_iterator region_end_in,
2775 vec_info_shared *shared)
2776 : vec_info (vec_info::bb, init_cost (NULL), shared),
2777 bb (gsi_bb (region_begin_in)),
2778 region_begin (region_begin_in),
2779 region_end (region_end_in)
2780 {
2781 gimple_stmt_iterator gsi;
2782
2783 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2784 gsi_next (&gsi))
2785 {
2786 gimple *stmt = gsi_stmt (gsi);
2787 gimple_set_uid (stmt, 0);
2788 add_stmt (stmt);
2789 }
2790
2791 bb->aux = this;
2792 }
2793
2794
2795 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2796 stmts in the basic block. */
2797
2798 _bb_vec_info::~_bb_vec_info ()
2799 {
2800 for (gimple_stmt_iterator si = region_begin;
2801 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2802 /* Reset region marker. */
2803 gimple_set_uid (gsi_stmt (si), -1);
2804
2805 bb->aux = NULL;
2806 }
2807
2808 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2809 given then that child nodes have already been processed, and that
2810 their def types currently match their SLP node's def type. */
2811
2812 static bool
2813 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2814 slp_instance node_instance,
2815 stmt_vector_for_cost *cost_vec)
2816 {
2817 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2818 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2819
2820 /* Calculate the number of vector statements to be created for the
2821 scalar stmts in this node. For SLP reductions it is equal to the
2822 number of vector statements in the children (which has already been
2823 calculated by the recursive call). Otherwise it is the number of
2824 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2825 VF divided by the number of elements in a vector. */
2826 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2827 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2828 {
2829 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2830 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2831 {
2832 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2833 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2834 break;
2835 }
2836 }
2837 else
2838 {
2839 poly_uint64 vf;
2840 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2841 vf = loop_vinfo->vectorization_factor;
2842 else
2843 vf = 1;
2844 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2845 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2846 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2847 = vect_get_num_vectors (vf * group_size, vectype);
2848 }
2849
2850 bool dummy;
2851 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2852 }
2853
2854 /* Try to build NODE from scalars, returning true on success.
2855 NODE_INSTANCE is the SLP instance that contains NODE. */
2856
2857 static bool
2858 vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
2859 slp_instance node_instance)
2860 {
2861 stmt_vec_info stmt_info;
2862 unsigned int i;
2863
2864 if (!is_a <bb_vec_info> (vinfo)
2865 || node == SLP_INSTANCE_TREE (node_instance)
2866 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
2867 return false;
2868
2869 if (dump_enabled_p ())
2870 dump_printf_loc (MSG_NOTE, vect_location,
2871 "Building vector operands from scalars instead\n");
2872
2873 /* Don't remove and free the child nodes here, since they could be
2874 referenced by other structures. The analysis and scheduling phases
2875 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2876 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
2877 SLP_TREE_DEF_TYPE (node) = vect_external_def;
2878 SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
2879 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2880 {
2881 tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
2882 SLP_TREE_SCALAR_OPS (node)[i] = lhs;
2883 }
2884 return true;
2885 }
2886
2887 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2888 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2889
2890 Return true if the operations are supported. */
2891
2892 static bool
2893 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2894 slp_instance node_instance,
2895 hash_set<slp_tree> &visited,
2896 hash_set<slp_tree> &lvisited,
2897 stmt_vector_for_cost *cost_vec)
2898 {
2899 int i, j;
2900 slp_tree child;
2901
2902 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2903 return true;
2904
2905 /* If we already analyzed the exact same set of scalar stmts we're done.
2906 We share the generated vector stmts for those.
2907 The SLP graph is acyclic so not caching whether we failed or succeeded
2908 doesn't result in any issue since we throw away the lvisited set
2909 when we fail. */
2910 if (visited.contains (node)
2911 || lvisited.add (node))
2912 return true;
2913
2914 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2915 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2916 visited, lvisited, cost_vec))
2917 return false;
2918
2919 /* ??? We have to catch the case late where two first scalar stmts appear
2920 in multiple SLP children with different def type and fail. Remember
2921 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2922 match it when that is vect_internal_def. */
2923 auto_vec<vect_def_type, 4> dt;
2924 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2925 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2926 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2927 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2928
2929 /* Push SLP node def-type to stmt operands. */
2930 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2931 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2932 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
2933 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2934 = SLP_TREE_DEF_TYPE (child);
2935
2936 /* Check everything worked out. */
2937 bool res = true;
2938 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2939 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2940 {
2941 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2942 {
2943 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2944 != SLP_TREE_DEF_TYPE (child))
2945 res = false;
2946 }
2947 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2948 != dt[j])
2949 res = false;
2950 }
2951 if (!res && dump_enabled_p ())
2952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2953 "not vectorized: same operand with different "
2954 "def type in stmt.\n");
2955
2956 if (res)
2957 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2958 cost_vec);
2959
2960 /* Restore def-types. */
2961 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2962 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2963 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2964
2965 /* If this node can't be vectorized, try pruning the tree here rather
2966 than felling the whole thing. */
2967 if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
2968 res = true;
2969
2970 return res;
2971 }
2972
2973
2974 /* Analyze statements in SLP instances of VINFO. Return true if the
2975 operations are supported. */
2976
2977 bool
2978 vect_slp_analyze_operations (vec_info *vinfo)
2979 {
2980 slp_instance instance;
2981 int i;
2982
2983 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2984
2985 hash_set<slp_tree> visited;
2986 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2987 {
2988 hash_set<slp_tree> lvisited;
2989 stmt_vector_for_cost cost_vec;
2990 cost_vec.create (2);
2991 if (!vect_slp_analyze_node_operations (vinfo,
2992 SLP_INSTANCE_TREE (instance),
2993 instance, visited, lvisited,
2994 &cost_vec)
2995 /* Instances with a root stmt require vectorized defs for the
2996 SLP tree root. */
2997 || (SLP_INSTANCE_ROOT_STMT (instance)
2998 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
2999 != vect_internal_def)))
3000 {
3001 slp_tree node = SLP_INSTANCE_TREE (instance);
3002 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3003 if (dump_enabled_p ())
3004 dump_printf_loc (MSG_NOTE, vect_location,
3005 "removing SLP instance operations starting from: %G",
3006 stmt_info->stmt);
3007 vect_free_slp_instance (instance, false);
3008 vinfo->slp_instances.ordered_remove (i);
3009 cost_vec.release ();
3010 }
3011 else
3012 {
3013 for (hash_set<slp_tree>::iterator x = lvisited.begin();
3014 x != lvisited.end(); ++x)
3015 visited.add (*x);
3016 i++;
3017
3018 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
3019 cost_vec.release ();
3020 }
3021 }
3022
3023 return !vinfo->slp_instances.is_empty ();
3024 }
3025
3026
3027 /* Compute the scalar cost of the SLP node NODE and its children
3028 and return it. Do not account defs that are marked in LIFE and
3029 update LIFE according to uses of NODE. */
3030
3031 static void
3032 vect_bb_slp_scalar_cost (basic_block bb,
3033 slp_tree node, vec<bool, va_heap> *life,
3034 stmt_vector_for_cost *cost_vec,
3035 hash_set<slp_tree> &visited)
3036 {
3037 unsigned i;
3038 stmt_vec_info stmt_info;
3039 slp_tree child;
3040
3041 if (visited.add (node))
3042 return;
3043
3044 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
3045 {
3046 gimple *stmt = stmt_info->stmt;
3047 vec_info *vinfo = stmt_info->vinfo;
3048 ssa_op_iter op_iter;
3049 def_operand_p def_p;
3050
3051 if ((*life)[i])
3052 continue;
3053
3054 /* If there is a non-vectorized use of the defs then the scalar
3055 stmt is kept live in which case we do not account it or any
3056 required defs in the SLP children in the scalar cost. This
3057 way we make the vectorization more costly when compared to
3058 the scalar cost. */
3059 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
3060 {
3061 imm_use_iterator use_iter;
3062 gimple *use_stmt;
3063 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
3064 if (!is_gimple_debug (use_stmt))
3065 {
3066 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
3067 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
3068 {
3069 (*life)[i] = true;
3070 BREAK_FROM_IMM_USE_STMT (use_iter);
3071 }
3072 }
3073 }
3074 if ((*life)[i])
3075 continue;
3076
3077 /* Count scalar stmts only once. */
3078 if (gimple_visited_p (stmt))
3079 continue;
3080 gimple_set_visited (stmt, true);
3081
3082 vect_cost_for_stmt kind;
3083 if (STMT_VINFO_DATA_REF (stmt_info))
3084 {
3085 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
3086 kind = scalar_load;
3087 else
3088 kind = scalar_store;
3089 }
3090 else if (vect_nop_conversion_p (stmt_info))
3091 continue;
3092 else
3093 kind = scalar_stmt;
3094 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
3095 }
3096
3097 auto_vec<bool, 20> subtree_life;
3098 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3099 {
3100 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3101 {
3102 /* Do not directly pass LIFE to the recursive call, copy it to
3103 confine changes in the callee to the current child/subtree. */
3104 subtree_life.safe_splice (*life);
3105 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
3106 visited);
3107 subtree_life.truncate (0);
3108 }
3109 }
3110 }
3111
3112 /* Check if vectorization of the basic block is profitable. */
3113
3114 static bool
3115 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
3116 {
3117 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3118 slp_instance instance;
3119 int i;
3120 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
3121 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
3122
3123 /* Calculate scalar cost. */
3124 stmt_vector_for_cost scalar_costs;
3125 scalar_costs.create (0);
3126 hash_set<slp_tree> visited;
3127 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3128 {
3129 auto_vec<bool, 20> life;
3130 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
3131 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
3132 SLP_INSTANCE_TREE (instance),
3133 &life, &scalar_costs, visited);
3134 }
3135 void *target_cost_data = init_cost (NULL);
3136 add_stmt_costs (target_cost_data, &scalar_costs);
3137 scalar_costs.release ();
3138 unsigned dummy;
3139 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
3140 destroy_cost_data (target_cost_data);
3141
3142 /* Unset visited flag. */
3143 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
3144 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3145 gimple_set_visited (gsi_stmt (gsi), false);
3146
3147 /* Complete the target-specific cost calculation. */
3148 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
3149 &vec_inside_cost, &vec_epilogue_cost);
3150
3151 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
3152
3153 if (dump_enabled_p ())
3154 {
3155 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3156 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
3157 vec_inside_cost);
3158 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
3159 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
3160 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
3161 }
3162
3163 /* Vectorization is profitable if its cost is more than the cost of scalar
3164 version. Note that we err on the vector side for equal cost because
3165 the cost estimate is otherwise quite pessimistic (constant uses are
3166 free on the scalar side but cost a load on the vector side for
3167 example). */
3168 if (vec_outside_cost + vec_inside_cost > scalar_cost)
3169 return false;
3170
3171 return true;
3172 }
3173
3174 /* Find any vectorizable constructors and add them to the grouped_store
3175 array. */
3176
3177 static void
3178 vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
3179 {
3180 gimple_stmt_iterator gsi;
3181
3182 for (gsi = bb_vinfo->region_begin;
3183 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3184 {
3185 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
3186 if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
3187 continue;
3188
3189 tree rhs = gimple_assign_rhs1 (stmt);
3190 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
3191 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
3192 CONSTRUCTOR_NELTS (rhs))
3193 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
3194 || uniform_vector_p (rhs))
3195 continue;
3196
3197 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3198 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3199 }
3200 }
3201
3202 /* Check if the region described by BB_VINFO can be vectorized, returning
3203 true if so. When returning false, set FATAL to true if the same failure
3204 would prevent vectorization at other vector sizes, false if it is still
3205 worth trying other sizes. N_STMTS is the number of statements in the
3206 region. */
3207
3208 static bool
3209 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
3210 {
3211 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3212
3213 slp_instance instance;
3214 int i;
3215 poly_uint64 min_vf = 2;
3216
3217 /* The first group of checks is independent of the vector size. */
3218 fatal = true;
3219
3220 /* Analyze the data references. */
3221
3222 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
3223 {
3224 if (dump_enabled_p ())
3225 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3226 "not vectorized: unhandled data-ref in basic "
3227 "block.\n");
3228 return false;
3229 }
3230
3231 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
3232 {
3233 if (dump_enabled_p ())
3234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3235 "not vectorized: not enough data-refs in "
3236 "basic block.\n");
3237 return false;
3238 }
3239
3240 if (!vect_analyze_data_ref_accesses (bb_vinfo))
3241 {
3242 if (dump_enabled_p ())
3243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3244 "not vectorized: unhandled data access in "
3245 "basic block.\n");
3246 return false;
3247 }
3248
3249 vect_slp_check_for_constructors (bb_vinfo);
3250
3251 /* If there are no grouped stores in the region there is no need
3252 to continue with pattern recog as vect_analyze_slp will fail
3253 anyway. */
3254 if (bb_vinfo->grouped_stores.is_empty ())
3255 {
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3258 "not vectorized: no grouped stores in "
3259 "basic block.\n");
3260 return false;
3261 }
3262
3263 /* While the rest of the analysis below depends on it in some way. */
3264 fatal = false;
3265
3266 vect_pattern_recog (bb_vinfo);
3267
3268 /* Check the SLP opportunities in the basic block, analyze and build SLP
3269 trees. */
3270 if (!vect_analyze_slp (bb_vinfo, n_stmts))
3271 {
3272 if (dump_enabled_p ())
3273 {
3274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3275 "Failed to SLP the basic block.\n");
3276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3277 "not vectorized: failed to find SLP opportunities "
3278 "in basic block.\n");
3279 }
3280 return false;
3281 }
3282
3283 vect_record_base_alignments (bb_vinfo);
3284
3285 /* Analyze and verify the alignment of data references and the
3286 dependence in the SLP instances. */
3287 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3288 {
3289 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3290 || ! vect_slp_analyze_instance_dependence (instance))
3291 {
3292 slp_tree node = SLP_INSTANCE_TREE (instance);
3293 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3294 if (dump_enabled_p ())
3295 dump_printf_loc (MSG_NOTE, vect_location,
3296 "removing SLP instance operations starting from: %G",
3297 stmt_info->stmt);
3298 vect_free_slp_instance (instance, false);
3299 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3300 continue;
3301 }
3302
3303 /* Mark all the statements that we want to vectorize as pure SLP and
3304 relevant. */
3305 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
3306 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3307 if (SLP_INSTANCE_ROOT_STMT (instance))
3308 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
3309
3310 i++;
3311 }
3312 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3313 return false;
3314
3315 if (!vect_slp_analyze_operations (bb_vinfo))
3316 {
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3319 "not vectorized: bad operation in basic block.\n");
3320 return false;
3321 }
3322
3323 /* Cost model: check if the vectorization is worthwhile. */
3324 if (!unlimited_cost_model (NULL)
3325 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3326 {
3327 if (dump_enabled_p ())
3328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3329 "not vectorized: vectorization is not "
3330 "profitable.\n");
3331 return false;
3332 }
3333
3334 if (dump_enabled_p ())
3335 dump_printf_loc (MSG_NOTE, vect_location,
3336 "Basic block will be vectorized using SLP\n");
3337 return true;
3338 }
3339
3340 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3341 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3342 on success. The region has N_STMTS statements and has the datarefs
3343 given by DATAREFS. */
3344
3345 static bool
3346 vect_slp_bb_region (gimple_stmt_iterator region_begin,
3347 gimple_stmt_iterator region_end,
3348 vec<data_reference_p> datarefs,
3349 unsigned int n_stmts)
3350 {
3351 bb_vec_info bb_vinfo;
3352 auto_vector_modes vector_modes;
3353
3354 /* Autodetect first vector size we try. */
3355 machine_mode next_vector_mode = VOIDmode;
3356 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3357 unsigned int mode_i = 0;
3358
3359 vec_info_shared shared;
3360
3361 machine_mode autodetected_vector_mode = VOIDmode;
3362 while (1)
3363 {
3364 bool vectorized = false;
3365 bool fatal = false;
3366 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3367
3368 bool first_time_p = shared.datarefs.is_empty ();
3369 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3370 if (first_time_p)
3371 bb_vinfo->shared->save_datarefs ();
3372 else
3373 bb_vinfo->shared->check_datarefs ();
3374 bb_vinfo->vector_mode = next_vector_mode;
3375
3376 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
3377 && dbg_cnt (vect_slp))
3378 {
3379 if (dump_enabled_p ())
3380 {
3381 dump_printf_loc (MSG_NOTE, vect_location,
3382 "***** Analysis succeeded with vector mode"
3383 " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3384 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3385 }
3386
3387 bb_vinfo->shared->check_datarefs ();
3388 vect_schedule_slp (bb_vinfo);
3389
3390 unsigned HOST_WIDE_INT bytes;
3391 if (dump_enabled_p ())
3392 {
3393 if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
3394 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3395 "basic block part vectorized using %wu byte "
3396 "vectors\n", bytes);
3397 else
3398 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3399 "basic block part vectorized using variable "
3400 "length vectors\n");
3401 }
3402
3403 vectorized = true;
3404 }
3405 else
3406 {
3407 if (dump_enabled_p ())
3408 dump_printf_loc (MSG_NOTE, vect_location,
3409 "***** Analysis failed with vector mode %s\n",
3410 GET_MODE_NAME (bb_vinfo->vector_mode));
3411 }
3412
3413 if (mode_i == 0)
3414 autodetected_vector_mode = bb_vinfo->vector_mode;
3415
3416 if (!fatal)
3417 while (mode_i < vector_modes.length ()
3418 && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3419 {
3420 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_NOTE, vect_location,
3422 "***** The result for vector mode %s would"
3423 " be the same\n",
3424 GET_MODE_NAME (vector_modes[mode_i]));
3425 mode_i += 1;
3426 }
3427
3428 delete bb_vinfo;
3429
3430 if (mode_i < vector_modes.length ()
3431 && VECTOR_MODE_P (autodetected_vector_mode)
3432 && (related_vector_mode (vector_modes[mode_i],
3433 GET_MODE_INNER (autodetected_vector_mode))
3434 == autodetected_vector_mode)
3435 && (related_vector_mode (autodetected_vector_mode,
3436 GET_MODE_INNER (vector_modes[mode_i]))
3437 == vector_modes[mode_i]))
3438 {
3439 if (dump_enabled_p ())
3440 dump_printf_loc (MSG_NOTE, vect_location,
3441 "***** Skipping vector mode %s, which would"
3442 " repeat the analysis for %s\n",
3443 GET_MODE_NAME (vector_modes[mode_i]),
3444 GET_MODE_NAME (autodetected_vector_mode));
3445 mode_i += 1;
3446 }
3447
3448 if (vectorized
3449 || mode_i == vector_modes.length ()
3450 || autodetected_vector_mode == VOIDmode
3451 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3452 vector sizes will fail do not bother iterating. */
3453 || fatal)
3454 return vectorized;
3455
3456 /* Try the next biggest vector size. */
3457 next_vector_mode = vector_modes[mode_i++];
3458 if (dump_enabled_p ())
3459 dump_printf_loc (MSG_NOTE, vect_location,
3460 "***** Re-trying analysis with vector mode %s\n",
3461 GET_MODE_NAME (next_vector_mode));
3462 }
3463 }
3464
3465 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3466 true if anything in the basic-block was vectorized. */
3467
3468 bool
3469 vect_slp_bb (basic_block bb)
3470 {
3471 gimple_stmt_iterator gsi;
3472 bool any_vectorized = false;
3473
3474 gsi = gsi_start_bb (bb);
3475 while (!gsi_end_p (gsi))
3476 {
3477 gimple_stmt_iterator region_begin = gsi;
3478 vec<data_reference_p> datarefs = vNULL;
3479 int insns = 0;
3480
3481 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3482 {
3483 gimple *stmt = gsi_stmt (gsi);
3484 if (is_gimple_debug (stmt))
3485 continue;
3486 insns++;
3487
3488 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3489 vect_location = stmt;
3490
3491 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3492 break;
3493 }
3494
3495 /* Skip leading unhandled stmts. */
3496 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3497 {
3498 gsi_next (&gsi);
3499 continue;
3500 }
3501
3502 gimple_stmt_iterator region_end = gsi;
3503
3504 if (insns > param_slp_max_insns_in_bb)
3505 {
3506 if (dump_enabled_p ())
3507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3508 "not vectorized: too many instructions in "
3509 "basic block.\n");
3510 }
3511 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
3512 any_vectorized = true;
3513
3514 if (gsi_end_p (region_end))
3515 break;
3516
3517 /* Skip the unhandled stmt. */
3518 gsi_next (&gsi);
3519 }
3520
3521 return any_vectorized;
3522 }
3523
3524
3525 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3526
3527 static bool
3528 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, unsigned op_num)
3529 {
3530 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3531 tree op, vectype;
3532 enum vect_def_type dt;
3533
3534 /* For comparison and COND_EXPR type is chosen depending
3535 on the non-constant other comparison operand. */
3536 if (TREE_CODE_CLASS (code) == tcc_comparison)
3537 {
3538 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3539 op = gimple_assign_rhs1 (stmt);
3540
3541 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3542 gcc_unreachable ();
3543
3544 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3545 }
3546
3547 if (code == COND_EXPR)
3548 {
3549 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3550 tree cond = gimple_assign_rhs1 (stmt);
3551
3552 if (TREE_CODE (cond) == SSA_NAME)
3553 {
3554 if (op_num > 0)
3555 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3556 op = cond;
3557 }
3558 else
3559 {
3560 if (op_num > 1)
3561 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3562 op = TREE_OPERAND (cond, 0);
3563 }
3564
3565 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3566 gcc_unreachable ();
3567
3568 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3569 }
3570
3571 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3572 }
3573
3574 /* Build a variable-length vector in which the elements in ELTS are repeated
3575 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3576 RESULTS and add any new instructions to SEQ.
3577
3578 The approach we use is:
3579
3580 (1) Find a vector mode VM with integer elements of mode IM.
3581
3582 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3583 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3584 from small vectors to IM.
3585
3586 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3587
3588 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3589 correct byte contents.
3590
3591 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3592
3593 We try to find the largest IM for which this sequence works, in order
3594 to cut down on the number of interleaves. */
3595
3596 void
3597 duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
3598 vec<tree> elts, unsigned int nresults,
3599 vec<tree> &results)
3600 {
3601 unsigned int nelts = elts.length ();
3602 tree element_type = TREE_TYPE (vector_type);
3603
3604 /* (1) Find a vector mode VM with integer elements of mode IM. */
3605 unsigned int nvectors = 1;
3606 tree new_vector_type;
3607 tree permutes[2];
3608 if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
3609 &nvectors, &new_vector_type,
3610 permutes))
3611 gcc_unreachable ();
3612
3613 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3614 unsigned int partial_nelts = nelts / nvectors;
3615 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3616
3617 tree_vector_builder partial_elts;
3618 auto_vec<tree, 32> pieces (nvectors * 2);
3619 pieces.quick_grow (nvectors * 2);
3620 for (unsigned int i = 0; i < nvectors; ++i)
3621 {
3622 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3623 ELTS' has mode IM. */
3624 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3625 for (unsigned int j = 0; j < partial_nelts; ++j)
3626 partial_elts.quick_push (elts[i * partial_nelts + j]);
3627 tree t = gimple_build_vector (seq, &partial_elts);
3628 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3629 TREE_TYPE (new_vector_type), t);
3630
3631 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3632 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3633 }
3634
3635 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3636 correct byte contents.
3637
3638 We need to repeat the following operation log2(nvectors) times:
3639
3640 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3641 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3642
3643 However, if each input repeats every N elements and the VF is
3644 a multiple of N * 2, the HI result is the same as the LO. */
3645 unsigned int in_start = 0;
3646 unsigned int out_start = nvectors;
3647 unsigned int hi_start = nvectors / 2;
3648 /* A bound on the number of outputs needed to produce NRESULTS results
3649 in the final iteration. */
3650 unsigned int noutputs_bound = nvectors * nresults;
3651 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3652 {
3653 noutputs_bound /= 2;
3654 unsigned int limit = MIN (noutputs_bound, nvectors);
3655 for (unsigned int i = 0; i < limit; ++i)
3656 {
3657 if ((i & 1) != 0
3658 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3659 2 * in_repeat))
3660 {
3661 pieces[out_start + i] = pieces[out_start + i - 1];
3662 continue;
3663 }
3664
3665 tree output = make_ssa_name (new_vector_type);
3666 tree input1 = pieces[in_start + (i / 2)];
3667 tree input2 = pieces[in_start + (i / 2) + hi_start];
3668 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3669 input1, input2,
3670 permutes[i & 1]);
3671 gimple_seq_add_stmt (seq, stmt);
3672 pieces[out_start + i] = output;
3673 }
3674 std::swap (in_start, out_start);
3675 }
3676
3677 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3678 results.reserve (nresults);
3679 for (unsigned int i = 0; i < nresults; ++i)
3680 if (i < nvectors)
3681 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3682 pieces[in_start + i]));
3683 else
3684 results.quick_push (results[i - nvectors]);
3685 }
3686
3687
3688 /* For constant and loop invariant defs of SLP_NODE this function returns
3689 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3690 OP_NODE determines the node for the operand containing the scalar
3691 operands. */
3692
3693 static void
3694 vect_get_constant_vectors (slp_tree slp_node, unsigned op_num,
3695 vec<tree> *vec_oprnds)
3696 {
3697 slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
3698 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3699 vec_info *vinfo = stmt_vinfo->vinfo;
3700 unsigned HOST_WIDE_INT nunits;
3701 tree vec_cst;
3702 unsigned j, number_of_places_left_in_vector;
3703 tree vector_type;
3704 tree vop;
3705 int group_size = op_node->ops.length ();
3706 unsigned int vec_num, i;
3707 unsigned number_of_copies = 1;
3708 bool constant_p;
3709 tree neutral_op = NULL;
3710 gimple_seq ctor_seq = NULL;
3711 auto_vec<tree, 16> permute_results;
3712
3713 /* ??? SLP analysis should compute the vector type for the
3714 constant / invariant and store it in the SLP node. */
3715 tree op = op_node->ops[0];
3716 /* Check if vector type is a boolean vector. */
3717 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3718 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3719 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3720 vector_type = truth_type_for (stmt_vectype);
3721 else
3722 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
3723
3724 /* ??? For lane-reducing ops we should also have the required number
3725 of vector stmts initialized rather than second-guessing here. */
3726 unsigned int number_of_vectors;
3727 if (is_gimple_assign (stmt_vinfo->stmt)
3728 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3729 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3730 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3731 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3732 else
3733 number_of_vectors
3734 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3735 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3736 vector_type);
3737 vec_oprnds->create (number_of_vectors);
3738 auto_vec<tree> voprnds (number_of_vectors);
3739
3740 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3741 created vectors. It is greater than 1 if unrolling is performed.
3742
3743 For example, we have two scalar operands, s1 and s2 (e.g., group of
3744 strided accesses of size two), while NUNITS is four (i.e., four scalars
3745 of this type can be packed in a vector). The output vector will contain
3746 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3747 will be 2).
3748
3749 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3750 containing the operands.
3751
3752 For example, NUNITS is four as before, and the group size is 8
3753 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3754 {s5, s6, s7, s8}. */
3755
3756 /* When using duplicate_and_interleave, we just need one element for
3757 each scalar statement. */
3758 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3759 nunits = group_size;
3760
3761 number_of_copies = nunits * number_of_vectors / group_size;
3762
3763 number_of_places_left_in_vector = nunits;
3764 constant_p = true;
3765 tree_vector_builder elts (vector_type, nunits, 1);
3766 elts.quick_grow (nunits);
3767 bool place_after_defs = false;
3768 for (j = 0; j < number_of_copies; j++)
3769 {
3770 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
3771 {
3772 /* Create 'vect_ = {op0,op1,...,opn}'. */
3773 number_of_places_left_in_vector--;
3774 tree orig_op = op;
3775 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3776 {
3777 if (CONSTANT_CLASS_P (op))
3778 {
3779 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3780 {
3781 /* Can't use VIEW_CONVERT_EXPR for booleans because
3782 of possibly different sizes of scalar value and
3783 vector element. */
3784 if (integer_zerop (op))
3785 op = build_int_cst (TREE_TYPE (vector_type), 0);
3786 else if (integer_onep (op))
3787 op = build_all_ones_cst (TREE_TYPE (vector_type));
3788 else
3789 gcc_unreachable ();
3790 }
3791 else
3792 op = fold_unary (VIEW_CONVERT_EXPR,
3793 TREE_TYPE (vector_type), op);
3794 gcc_assert (op && CONSTANT_CLASS_P (op));
3795 }
3796 else
3797 {
3798 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3799 gimple *init_stmt;
3800 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3801 {
3802 tree true_val
3803 = build_all_ones_cst (TREE_TYPE (vector_type));
3804 tree false_val
3805 = build_zero_cst (TREE_TYPE (vector_type));
3806 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3807 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3808 op, true_val,
3809 false_val);
3810 }
3811 else
3812 {
3813 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3814 op);
3815 init_stmt
3816 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3817 op);
3818 }
3819 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3820 op = new_temp;
3821 }
3822 }
3823 elts[number_of_places_left_in_vector] = op;
3824 if (!CONSTANT_CLASS_P (op))
3825 constant_p = false;
3826 if (TREE_CODE (orig_op) == SSA_NAME
3827 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3828 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3829 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3830 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3831 place_after_defs = true;
3832
3833 if (number_of_places_left_in_vector == 0)
3834 {
3835 if (constant_p
3836 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3837 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3838 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3839 else
3840 {
3841 if (permute_results.is_empty ())
3842 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3843 elts, number_of_vectors,
3844 permute_results);
3845 vec_cst = permute_results[number_of_vectors - j - 1];
3846 }
3847 tree init;
3848 gimple_stmt_iterator gsi;
3849 if (place_after_defs)
3850 {
3851 stmt_vec_info last_stmt_info
3852 = vect_find_last_scalar_stmt_in_slp (slp_node);
3853 gsi = gsi_for_stmt (last_stmt_info->stmt);
3854 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3855 &gsi);
3856 }
3857 else
3858 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3859 NULL);
3860 if (ctor_seq != NULL)
3861 {
3862 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3863 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3864 ctor_seq = NULL;
3865 }
3866 voprnds.quick_push (init);
3867 place_after_defs = false;
3868 number_of_places_left_in_vector = nunits;
3869 constant_p = true;
3870 elts.new_vector (vector_type, nunits, 1);
3871 elts.quick_grow (nunits);
3872 }
3873 }
3874 }
3875
3876 /* Since the vectors are created in the reverse order, we should invert
3877 them. */
3878 vec_num = voprnds.length ();
3879 for (j = vec_num; j != 0; j--)
3880 {
3881 vop = voprnds[j - 1];
3882 vec_oprnds->quick_push (vop);
3883 }
3884
3885 /* In case that VF is greater than the unrolling factor needed for the SLP
3886 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3887 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3888 to replicate the vectors. */
3889 while (number_of_vectors > vec_oprnds->length ())
3890 {
3891 tree neutral_vec = NULL;
3892
3893 if (neutral_op)
3894 {
3895 if (!neutral_vec)
3896 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3897
3898 vec_oprnds->quick_push (neutral_vec);
3899 }
3900 else
3901 {
3902 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3903 vec_oprnds->quick_push (vop);
3904 }
3905 }
3906 }
3907
3908
3909 /* Get vectorized definitions from SLP_NODE that contains corresponding
3910 vectorized def-stmts. */
3911
3912 static void
3913 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3914 {
3915 stmt_vec_info vec_def_stmt_info;
3916 unsigned int i;
3917
3918 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3919
3920 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3921 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
3922 }
3923
3924
3925 /* Get N vectorized definitions for SLP_NODE.
3926 If the scalar definitions are loop invariants or constants, collect them and
3927 call vect_get_constant_vectors() to create vector stmts.
3928 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3929 must be stored in the corresponding child of SLP_NODE, and we call
3930 vect_get_slp_vect_defs () to retrieve them. */
3931
3932 void
3933 vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
3934 {
3935 if (n == -1U)
3936 n = SLP_TREE_CHILDREN (slp_node).length ();
3937
3938 for (unsigned i = 0; i < n; ++i)
3939 {
3940 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
3941
3942 vec<tree> vec_defs = vNULL;
3943
3944 /* For each operand we check if it has vectorized definitions in a child
3945 node or we need to create them (for invariants and constants). */
3946 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3947 {
3948 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3949 vect_get_slp_vect_defs (child, &vec_defs);
3950 }
3951 else
3952 vect_get_constant_vectors (slp_node, i, &vec_defs);
3953
3954 vec_oprnds->quick_push (vec_defs);
3955 }
3956 }
3957
3958 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3959 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3960 permute statements for the SLP node NODE of the SLP instance
3961 SLP_NODE_INSTANCE. */
3962
3963 bool
3964 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3965 gimple_stmt_iterator *gsi, poly_uint64 vf,
3966 slp_instance slp_node_instance, bool analyze_only,
3967 unsigned *n_perms)
3968 {
3969 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3970 vec_info *vinfo = stmt_info->vinfo;
3971 int vec_index = 0;
3972 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3973 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3974 unsigned int mask_element;
3975 machine_mode mode;
3976
3977 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3978 return false;
3979
3980 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3981
3982 mode = TYPE_MODE (vectype);
3983 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3984
3985 /* Initialize the vect stmts of NODE to properly insert the generated
3986 stmts later. */
3987 if (! analyze_only)
3988 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3989 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3990 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3991
3992 /* Generate permutation masks for every NODE. Number of masks for each NODE
3993 is equal to GROUP_SIZE.
3994 E.g., we have a group of three nodes with three loads from the same
3995 location in each node, and the vector size is 4. I.e., we have a
3996 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3997 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3998 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3999 ...
4000
4001 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
4002 The last mask is illegal since we assume two operands for permute
4003 operation, and the mask element values can't be outside that range.
4004 Hence, the last mask must be converted into {2,5,5,5}.
4005 For the first two permutations we need the first and the second input
4006 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
4007 we need the second and the third vectors: {b1,c1,a2,b2} and
4008 {c2,a3,b3,c3}. */
4009
4010 int vect_stmts_counter = 0;
4011 unsigned int index = 0;
4012 int first_vec_index = -1;
4013 int second_vec_index = -1;
4014 bool noop_p = true;
4015 *n_perms = 0;
4016
4017 vec_perm_builder mask;
4018 unsigned int nelts_to_build;
4019 unsigned int nvectors_per_build;
4020 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
4021 && multiple_p (nunits, group_size));
4022 if (repeating_p)
4023 {
4024 /* A single vector contains a whole number of copies of the node, so:
4025 (a) all permutes can use the same mask; and
4026 (b) the permutes only need a single vector input. */
4027 mask.new_vector (nunits, group_size, 3);
4028 nelts_to_build = mask.encoded_nelts ();
4029 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
4030 }
4031 else
4032 {
4033 /* We need to construct a separate mask for each vector statement. */
4034 unsigned HOST_WIDE_INT const_nunits, const_vf;
4035 if (!nunits.is_constant (&const_nunits)
4036 || !vf.is_constant (&const_vf))
4037 return false;
4038 mask.new_vector (const_nunits, const_nunits, 1);
4039 nelts_to_build = const_vf * group_size;
4040 nvectors_per_build = 1;
4041 }
4042
4043 unsigned int count = mask.encoded_nelts ();
4044 mask.quick_grow (count);
4045 vec_perm_indices indices;
4046
4047 for (unsigned int j = 0; j < nelts_to_build; j++)
4048 {
4049 unsigned int iter_num = j / group_size;
4050 unsigned int stmt_num = j % group_size;
4051 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
4052 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
4053 if (repeating_p)
4054 {
4055 first_vec_index = 0;
4056 mask_element = i;
4057 }
4058 else
4059 {
4060 /* Enforced before the loop when !repeating_p. */
4061 unsigned int const_nunits = nunits.to_constant ();
4062 vec_index = i / const_nunits;
4063 mask_element = i % const_nunits;
4064 if (vec_index == first_vec_index
4065 || first_vec_index == -1)
4066 {
4067 first_vec_index = vec_index;
4068 }
4069 else if (vec_index == second_vec_index
4070 || second_vec_index == -1)
4071 {
4072 second_vec_index = vec_index;
4073 mask_element += const_nunits;
4074 }
4075 else
4076 {
4077 if (dump_enabled_p ())
4078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4079 "permutation requires at "
4080 "least three vectors %G",
4081 stmt_info->stmt);
4082 gcc_assert (analyze_only);
4083 return false;
4084 }
4085
4086 gcc_assert (mask_element < 2 * const_nunits);
4087 }
4088
4089 if (mask_element != index)
4090 noop_p = false;
4091 mask[index++] = mask_element;
4092
4093 if (index == count && !noop_p)
4094 {
4095 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
4096 if (!can_vec_perm_const_p (mode, indices))
4097 {
4098 if (dump_enabled_p ())
4099 {
4100 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4101 vect_location,
4102 "unsupported vect permute { ");
4103 for (i = 0; i < count; ++i)
4104 {
4105 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
4106 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
4107 }
4108 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
4109 }
4110 gcc_assert (analyze_only);
4111 return false;
4112 }
4113
4114 ++*n_perms;
4115 }
4116
4117 if (index == count)
4118 {
4119 if (!analyze_only)
4120 {
4121 tree mask_vec = NULL_TREE;
4122
4123 if (! noop_p)
4124 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
4125
4126 if (second_vec_index == -1)
4127 second_vec_index = first_vec_index;
4128
4129 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
4130 {
4131 /* Generate the permute statement if necessary. */
4132 tree first_vec = dr_chain[first_vec_index + ri];
4133 tree second_vec = dr_chain[second_vec_index + ri];
4134 stmt_vec_info perm_stmt_info;
4135 if (! noop_p)
4136 {
4137 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4138 tree perm_dest
4139 = vect_create_destination_var (gimple_assign_lhs (stmt),
4140 vectype);
4141 perm_dest = make_ssa_name (perm_dest);
4142 gassign *perm_stmt
4143 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
4144 first_vec, second_vec,
4145 mask_vec);
4146 perm_stmt_info
4147 = vect_finish_stmt_generation (stmt_info, perm_stmt,
4148 gsi);
4149 }
4150 else
4151 /* If mask was NULL_TREE generate the requested
4152 identity transform. */
4153 perm_stmt_info = vinfo->lookup_def (first_vec);
4154
4155 /* Store the vector statement in NODE. */
4156 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
4157 = perm_stmt_info;
4158 }
4159 }
4160
4161 index = 0;
4162 first_vec_index = -1;
4163 second_vec_index = -1;
4164 noop_p = true;
4165 }
4166 }
4167
4168 return true;
4169 }
4170
4171 /* Vectorize SLP instance tree in postorder. */
4172
4173 static void
4174 vect_schedule_slp_instance (slp_tree node, slp_instance instance)
4175 {
4176 gimple_stmt_iterator si;
4177 stmt_vec_info stmt_info;
4178 unsigned int group_size;
4179 tree vectype;
4180 int i, j;
4181 slp_tree child;
4182
4183 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4184 return;
4185
4186 /* See if we have already vectorized the node in the graph of the
4187 SLP instance. */
4188 if (SLP_TREE_VEC_STMTS (node).exists ())
4189 return;
4190
4191 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4192 vect_schedule_slp_instance (child, instance);
4193
4194 /* Push SLP node def-type to stmts. */
4195 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4196 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4197 {
4198 stmt_vec_info child_stmt_info;
4199 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4200 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4201 }
4202
4203 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
4204
4205 /* VECTYPE is the type of the destination. */
4206 vectype = STMT_VINFO_VECTYPE (stmt_info);
4207 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4208 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
4209
4210 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
4211 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
4212
4213 if (dump_enabled_p ())
4214 dump_printf_loc (MSG_NOTE, vect_location,
4215 "------>vectorizing SLP node starting from: %G",
4216 stmt_info->stmt);
4217
4218 /* Vectorized stmts go before the last scalar stmt which is where
4219 all uses are ready. */
4220 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4221 si = gsi_for_stmt (last_stmt_info->stmt);
4222
4223 /* Handle two-operation SLP nodes by vectorizing the group with
4224 both operations and then performing a merge. */
4225 bool done_p = false;
4226 if (SLP_TREE_TWO_OPERATORS (node))
4227 {
4228 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4229 enum tree_code code0 = gimple_assign_rhs_code (stmt);
4230 enum tree_code ocode = ERROR_MARK;
4231 stmt_vec_info ostmt_info;
4232 vec_perm_builder mask (group_size, group_size, 1);
4233 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4234 {
4235 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4236 if (gimple_assign_rhs_code (ostmt) != code0)
4237 {
4238 mask.quick_push (1);
4239 ocode = gimple_assign_rhs_code (ostmt);
4240 }
4241 else
4242 mask.quick_push (0);
4243 }
4244 if (ocode != ERROR_MARK)
4245 {
4246 vec<stmt_vec_info> v0;
4247 vec<stmt_vec_info> v1;
4248 unsigned j;
4249 tree tmask = NULL_TREE;
4250 vect_transform_stmt (stmt_info, &si, node, instance);
4251 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4252 SLP_TREE_VEC_STMTS (node).truncate (0);
4253 gimple_assign_set_rhs_code (stmt, ocode);
4254 vect_transform_stmt (stmt_info, &si, node, instance);
4255 gimple_assign_set_rhs_code (stmt, code0);
4256 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4257 SLP_TREE_VEC_STMTS (node).truncate (0);
4258 tree meltype = build_nonstandard_integer_type
4259 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4260 tree mvectype = get_same_sized_vectype (meltype, vectype);
4261 unsigned k = 0, l;
4262 for (j = 0; j < v0.length (); ++j)
4263 {
4264 /* Enforced by vect_build_slp_tree, which rejects variable-length
4265 vectors for SLP_TREE_TWO_OPERATORS. */
4266 unsigned int const_nunits = nunits.to_constant ();
4267 tree_vector_builder melts (mvectype, const_nunits, 1);
4268 for (l = 0; l < const_nunits; ++l)
4269 {
4270 if (k >= group_size)
4271 k = 0;
4272 tree t = build_int_cst (meltype,
4273 mask[k++] * const_nunits + l);
4274 melts.quick_push (t);
4275 }
4276 tmask = melts.build ();
4277
4278 /* ??? Not all targets support a VEC_PERM_EXPR with a
4279 constant mask that would translate to a vec_merge RTX
4280 (with their vec_perm_const_ok). We can either not
4281 vectorize in that case or let veclower do its job.
4282 Unfortunately that isn't too great and at least for
4283 plus/minus we'd eventually like to match targets
4284 vector addsub instructions. */
4285 gimple *vstmt;
4286 vstmt = gimple_build_assign (make_ssa_name (vectype),
4287 VEC_PERM_EXPR,
4288 gimple_assign_lhs (v0[j]->stmt),
4289 gimple_assign_lhs (v1[j]->stmt),
4290 tmask);
4291 SLP_TREE_VEC_STMTS (node).quick_push
4292 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
4293 }
4294 v0.release ();
4295 v1.release ();
4296 done_p = true;
4297 }
4298 }
4299 if (!done_p)
4300 vect_transform_stmt (stmt_info, &si, node, instance);
4301
4302 /* Restore stmt def-types. */
4303 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4304 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4305 {
4306 stmt_vec_info child_stmt_info;
4307 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4308 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4309 }
4310 }
4311
4312 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4313 For loop vectorization this is done in vectorizable_call, but for SLP
4314 it needs to be deferred until end of vect_schedule_slp, because multiple
4315 SLP instances may refer to the same scalar stmt. */
4316
4317 static void
4318 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4319 {
4320 gimple *new_stmt;
4321 gimple_stmt_iterator gsi;
4322 int i;
4323 slp_tree child;
4324 tree lhs;
4325 stmt_vec_info stmt_info;
4326
4327 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4328 return;
4329
4330 if (visited.add (node))
4331 return;
4332
4333 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4334 vect_remove_slp_scalar_calls (child, visited);
4335
4336 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4337 {
4338 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4339 if (!stmt || gimple_bb (stmt) == NULL)
4340 continue;
4341 if (is_pattern_stmt_p (stmt_info)
4342 || !PURE_SLP_STMT (stmt_info))
4343 continue;
4344 lhs = gimple_call_lhs (stmt);
4345 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4346 gsi = gsi_for_stmt (stmt);
4347 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4348 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4349 }
4350 }
4351
4352 static void
4353 vect_remove_slp_scalar_calls (slp_tree node)
4354 {
4355 hash_set<slp_tree> visited;
4356 vect_remove_slp_scalar_calls (node, visited);
4357 }
4358
4359 /* Vectorize the instance root. */
4360
4361 void
4362 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4363 {
4364 gassign *rstmt = NULL;
4365
4366 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4367 {
4368 stmt_vec_info child_stmt_info;
4369 int j;
4370
4371 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4372 {
4373 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4374 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4375 if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
4376 TREE_TYPE (vect_lhs)))
4377 vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
4378 vect_lhs);
4379 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4380 break;
4381 }
4382 }
4383 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4384 {
4385 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4386 stmt_vec_info child_stmt_info;
4387 int j;
4388 vec<constructor_elt, va_gc> *v;
4389 vec_alloc (v, nelts);
4390
4391 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4392 {
4393 CONSTRUCTOR_APPEND_ELT (v,
4394 NULL_TREE,
4395 gimple_get_lhs (child_stmt_info->stmt));
4396 }
4397 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4398 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4399 tree r_constructor = build_constructor (rtype, v);
4400 rstmt = gimple_build_assign (lhs, r_constructor);
4401 }
4402
4403 gcc_assert (rstmt);
4404
4405 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4406 gsi_replace (&rgsi, rstmt, true);
4407 }
4408
4409 /* Generate vector code for all SLP instances in the loop/basic block. */
4410
4411 void
4412 vect_schedule_slp (vec_info *vinfo)
4413 {
4414 vec<slp_instance> slp_instances;
4415 slp_instance instance;
4416 unsigned int i;
4417
4418 slp_instances = vinfo->slp_instances;
4419 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4420 {
4421 slp_tree node = SLP_INSTANCE_TREE (instance);
4422 /* Schedule the tree of INSTANCE. */
4423 vect_schedule_slp_instance (node, instance);
4424
4425 if (SLP_INSTANCE_ROOT_STMT (instance))
4426 vectorize_slp_instance_root_stmt (node, instance);
4427
4428 if (dump_enabled_p ())
4429 dump_printf_loc (MSG_NOTE, vect_location,
4430 "vectorizing stmts using SLP.\n");
4431 }
4432
4433 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4434 {
4435 slp_tree root = SLP_INSTANCE_TREE (instance);
4436 stmt_vec_info store_info;
4437 unsigned int j;
4438
4439 /* Remove scalar call stmts. Do not do this for basic-block
4440 vectorization as not all uses may be vectorized.
4441 ??? Why should this be necessary? DCE should be able to
4442 remove the stmts itself.
4443 ??? For BB vectorization we can as well remove scalar
4444 stmts starting from the SLP tree root if they have no
4445 uses. */
4446 if (is_a <loop_vec_info> (vinfo))
4447 vect_remove_slp_scalar_calls (root);
4448
4449 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4450 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4451 {
4452 if (!STMT_VINFO_DATA_REF (store_info))
4453 break;
4454
4455 if (SLP_INSTANCE_ROOT_STMT (instance))
4456 continue;
4457
4458 store_info = vect_orig_stmt (store_info);
4459 /* Free the attached stmt_vec_info and remove the stmt. */
4460 vinfo->remove_stmt (store_info);
4461 }
4462 }
4463 }